Telegraf+InfluxDB2+Grafana按所选择时间

2022-12-07 本文已影响0人 EvineDeng

Telegraf使用hddtemp插件记录硬盘温度；
InfluxDB2用来接收Telegraf采集到的数据；
Grafana用来数据展示。

`Telegraf`配置

# Configuration for telegraf agent
[agent]
  ## Default data collection interval for all inputs
  interval = "10s"
  ## Rounds collection interval to 'interval'
  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
  round_interval = true

  ## Telegraf will send metrics to outputs in batches of at most
  ## metric_batch_size metrics.
  ## This controls the size of writes that Telegraf sends to output plugins.
  metric_batch_size = 1000

  ## Maximum number of unwritten metrics per output.  Increasing this value
  ## allows for longer periods of output downtime without dropping metrics at the
  ## cost of higher maximum memory usage.
  metric_buffer_limit = 10000

  ## Collection jitter is used to jitter the collection by a random amount.
  ## Each plugin will sleep for a random time within jitter before collecting.
  ## This can be used to avoid many plugins querying things like sysfs at the
  ## same time, which can have a measurable effect on the system.
  collection_jitter = "0s"

  ## Default flushing interval for all outputs. Maximum flush_interval will be
  ## flush_interval + flush_jitter
  flush_interval = "10s"
  ## Jitter the flush interval by a random amount. This is primarily to avoid
  ## large write spikes for users running a large number of telegraf instances.
  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
  flush_jitter = "0s"

  ## By default or when set to "0s", precision will be set to the same
  ## timestamp order as the collection interval, with the maximum being 1s.
  ##   ie, when interval = "10s", precision will be "1s"
  ##       when interval = "250ms", precision will be "1ms"
  ## Precision will NOT be used for service inputs. It is up to each individual
  ## service input to set the timestamp at the appropriate precision.
  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
  precision = ""

  ## Log at debug level.
  # debug = false
  ## Log only error level messages.
  # quiet = false

  ## Log target controls the destination for logs and can be one of "file",
  ## "stderr" or, on Windows, "eventlog".  When set to "file", the output file
  ## is determined by the "logfile" setting.
  # logtarget = "file"

  ## Name of the file to be logged to when using the "file" logtarget.  If set to
  ## the empty string then logs are written to stderr.
  # logfile = ""

  ## The logfile will be rotated after the time interval specified.  When set
  ## to 0 no time based rotation is performed.  Logs are rotated only when
  ## written to, if there is no log activity rotation may be delayed.
  # logfile_rotation_interval = "0d"

  ## The logfile will be rotated when it becomes larger than the specified
  ## size.  When set to 0 no size based rotation is performed.
  # logfile_rotation_max_size = "0MB"

  ## Maximum number of rotated archives to keep, any older logs are deleted.
  ## If set to -1, no archives are removed.
  # logfile_rotation_max_archives = 5

  ## Pick a timezone to use when logging or type 'local' for local time.
  ## Example: America/Chicago
  # log_with_timezone = ""

  ## Override default hostname, if empty use os.Hostname()
  hostname = ""
  ## If set to true, do no set the "host" tag in the telegraf agent.
  omit_hostname = false
[[outputs.influxdb_v2]]
  ## The URLs of the InfluxDB cluster nodes.
  ##
  ## Multiple URLs can be specified for a single cluster, only ONE of the
  ## urls will be written to each interval.
  ##   ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"]
  urls = ["http://localhost:8086"]

  ## Token for authentication.
  token = "$INFLUX_TOKEN"

  ## Organization is the name of the organization you wish to write to; must exist.
  organization = "proxmox"

  ## Destination bucket to write into.
  bucket = "hddtemp"

  ## The value of this tag will be used to determine the bucket.  If this
  ## tag is not set the 'bucket' option is used as the default.
  # bucket_tag = ""

  ## If true, the bucket tag will not be added to the metric.
  # exclude_bucket_tag = false

  ## Timeout for HTTP messages.
  # timeout = "5s"

  ## Additional HTTP headers
  # http_headers = {"X-Special-Header" = "Special-Value"}

  ## HTTP Proxy override, if unset values the standard proxy environment
  ## variables are consulted to determine which proxy, if any, should be used.
  # http_proxy = "http://corporate.proxy:3128"

  ## HTTP User-Agent
  # user_agent = "telegraf"

  ## Content-Encoding for write request body, can be set to "gzip" to
  ## compress body or "identity" to apply no encoding.
  # content_encoding = "gzip"

  ## Enable or disable uint support for writing uints influxdb 2.0.
  # influx_uint_support = false

  ## Optional TLS Config for use on HTTP connections.
  # tls_ca = "/etc/telegraf/ca.pem"
  # tls_cert = "/etc/telegraf/cert.pem"
  # tls_key = "/etc/telegraf/key.pem"
  ## Use TLS but skip chain & host verification
  # insecure_skip_verify = false
# Monitor disks' temperatures using hddtemp
[[inputs.hddtemp]]
  ## By default, telegraf gathers temps data from all disks detected by the
  ## hddtemp.
  ##
  ## Only collect temps from the selected disks.
  ##
  ## A * as the device name will return the temperature values of all disks.
  ##
  address = "localhost:7634"
  #devices = ["sd*", "nvme*"]
  interval = "5m"
  flush_interval = "5m"

`InfluxDB2/Grafana`数据展示代码

import "date"
import "strings"
import "interpolate"

from(bucket: "hddtemp")
  |> range(
    start: date.truncate(
      t: v.timeRangeStart, 
      unit: if uint(v: v.windowPeriod) > 60000000000 then 1d else 1h,  // 根据所选择的时间范围来确定 date.truncate 到什么程度
      location: {zone: "Asia/Shanghai", offset: 0h}
    ),
    stop: v.timeRangeStop
  )
  |> filter(fn: (r) => r["_measurement"] == "hddtemp")
  |> filter(fn: (r) => r["_field"] == "temperature")
  |> filter(fn: (r) => r["host"] == "telegraf")
  |> filter(fn: (r) => r["_value"] > 0)
  |> map(fn: (r) => ({r with device: strings.trimPrefix(v: r.device, prefix: "ata-")}))   // 删除 device 的前缀
  |> map(fn: (r) => ({r with device: strings.trimPrefix(v: r.device, prefix: "nvme-")}))  // 删除 device 的前缀
  |> map(fn: (r) => ({r with _value: float(v: r._value)}))  // 后面的线性插值需要 _value 为 float 类型
  |> keep(columns: ["_value", "_start", "_stop", "_time", "device"])  // 只保留用来作图的数据
  |> interpolate.linear(every: v.windowPeriod)  // 线性插值
  |> aggregateWindow(
    every: 
      if uint(v: v.windowPeriod) <= 60000000000 then //1天以内取1个像素所代表的时间范围的20倍为最小绘制单元
        date.scale(d: v.windowPeriod, n: 20)
      else if uint(v: v.windowPeriod) > 60000000000 and uint(v: v.windowPeriod) <= 120000000000 then //1-2天取40分钟为最小绘制单元
        40m
      else if uint(v: v.windowPeriod) > 120000000000 and uint(v: v.windowPeriod) <= 600000000000 then //2-7天取2小时为最小绘制单元
        2h
      else if uint(v: v.windowPeriod) > 600000000000 and uint(v: v.windowPeriod) <= 1800000000000 then //7-30天取6小时为最小绘制单元
        6h
      else if uint(v: v.windowPeriod) > 1800000000000 and uint(v: v.windowPeriod) <= 7200000000000 then //30-90天取12小时为最小绘制单元
        12h
      else //90天以上取1天为最小绘制单元
        1d,
    fn: mean, 
    timeSrc: "_start",
    location: {zone: "Asia/Shanghai", offset: 0h}
  )

注：nvme磁盘无法通过hddtemp插件获取温度，需要自行通过脚本使用 influxdb2-client 自行写入。