监控监控

prometheus普罗米修斯监控钉钉报警

2022-03-25  本文已影响0人  王宣成
vim /tmp/prometheus.yml
scrape_configs:  
  - job_name: "prometheus"
    static_configs:
      - targets: ['101.43.104.115:9090']

  - job_name: 'node'
    static_configs:
      - targets: ['101.43.104.115:9100']

  - job_name: 'alertmanager'
    static_configs:
      - targets: ['101.43.104.115:9093']

  - job_name: 'docker'
    static_configs:
    - targets: ['101.43.104.115:8080']

alerting:  
  alertmanagers:  
  - static_configs:  
    - targets:  
      - 101.43.104.115:9093  
  
rule_files:  
  - "/etc/prometheus/rules.yml"
vim /tmp/rules.yml
groups:
- name: Host
  rules:
  - alert: HostMemory Usage
    expr: (node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / node_memory_MemTotal_bytes * 100 >  80
    for: 1m
    labels:
      name: Memory
      severity: Warning
    annotations:
      summary: " {{ $labels.appname }} "
      description: "主机内存使用率超过80%."
      value: "{{ $value }}"
  - alert: HostCPU Usage
    expr: sum(avg without (cpu)(irate(node_cpu_seconds_total{mode!='idle'}[5m]))) by (instance,appname) > 0.80
    for: 1m
    labels:
      name: CPU
      severity: Warning
    annotations:
      summary: " {{ $labels.appname }} "
      description: "主机CPU使用率超过80%."
      value: "{{ $value }}"
  - alert: HostLoad 
    expr: node_load5 > 4
    for: 1m
    labels:
      name: Load
      severity: Warning
    annotations:
      summary: "{{ $labels.appname }} "
      description: " 主机负载5分钟超过4."
      value: "{{ $value }}"
  - alert: HostFilesystem Usage
    expr: 1-(node_filesystem_free_bytes / node_filesystem_size_bytes) >  0.8
    for: 1m
    labels:
      name: Disk
      severity: Warning
    annotations:
      summary: " {{ $labels.appname }} "
      description: " 主机 [ {{ $labels.mountpoint }} ]分区使用超过80%."
      value: "{{ $value }}%"
  - alert: HostDiskio
    expr: irate(node_disk_writes_completed_total{job=~"Host"}[1m]) > 10
    for: 1m
    labels:
      name: Diskio
      severity: Warning
    annotations:
      summary: " {{ $labels.appname }} "
      description: " 主机 [{{ $labels.device }}]磁盘1分钟平均写入IO负载较高."
      value: "{{ $value }}iops"
  - alert: Network_receive
    expr: irate(node_network_receive_bytes_total{device!~"lo|bond[0-9]|cbr[0-9]|veth.*|virbr.*|ovs-system"}[5m]) / 1048576  > 3 
    for: 1m
    labels:
      name: Network_receive
      severity: Warning
    annotations:
      summary: " {{ $labels.appname }} "
      description: " 主机 [{{ $labels.device }}] 网卡5分钟平均接收流量超过3Mbps."
      value: "{{ $value }}3Mbps"
  - alert: Network_transmit
    expr: irate(node_network_transmit_bytes_total{device!~"lo|bond[0-9]|cbr[0-9]|veth.*|virbr.*|ovs-system"}[5m]) / 1048576  > 3
    for: 1m
    labels:
      name: Network_transmit
      severity: Warning
    annotations:
      summary: " {{ $labels.appname }} "
      description: " 主机 [{{ $labels.device }}] 网卡5分钟内平均发送流量超过3Mbps."
      value: "{{ $value }}3Mbps"


vim /tmp/alertmanager.yml
global:  
  resolve_timeout: 5m  
route:  
  receiver: webhook  
  group_wait: 30s  
  group_interval: 5m  
  repeat_interval: 5m  
  group_by: [alertname]  
  routes:  
  - receiver: webhook  
    group_wait: 10s  
receivers:  
- name: webhook  
  webhook_configs:  
  - url: http://101.43.104.115:8060/dingtalk/webhook1/send  
    send_resolved: true
docker run -d --name alertmanager -p 9093:9093 -v /tmp/alertmanager.yml:/etc/alertmanager/alertmanager.yml prom/alertmanager
 docker run -d  --name node-exporter -p 9100:9100 -v /proc:/host/proc -v /sys:/host/sys -v /:/rootfs  prom/node-exporter --path.procfs /host/proc --path.sysfs /host/sys --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc)($|/)"
docker run \
   --volume=/:/rootfs:ro \
   --volume=/var/run:/var/run:ro \
   --volume=/sys:/sys:ro \
   --volume=/var/lib/docker/:/var/lib/docker:ro \
   --volume=/dev/disk/:/dev/disk:ro \
   --volume=/cgroup:/cgroup:ro \
   --privileged=true \
   --publish=8080:8080 \
   --detach=true \
   --name=cadvisor \
   google/cadvisor
# access_token=钉钉机器人token
docker run -d --name dingtalk --restart always -p 8060:8060 timonwong/prometheus-webhook-dingtalk:v0.3.0 --ding.profile="webhook1=https://oapi.dingtalk.com/robot/send?access_token=xxxxx"
docker run -d --name prometheus -p 9090:9090 -v /tmp/prometheus.yml:/etc/prometheus/prometheus.yml -v /tmp/rules.yml:/etc/prometheus/rules.yml  prom/prometheus:latest
docker run -d \
  -p 3000:3000 \
  --restart=always \
  --name=grafana \
  -e "GF_SECURITY_ADMIN_PASSWORD=admin" \
  -e "GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,raintank-worldping-app,grafana-piechart-panel" \
  grafana/grafana
添加设置钉钉机器人
image.png
# 手动拉高系统的CPU使用率
cat /dev/zero>/dev/null
等待一会提示警告
image.png
http://101.43.104.115:9090/alerts
http://101.43.104.115:9090/targets
http://101.43.104.115:9090/rules
image.png

访问grafana 账号: admin 密码: admin
http://101.43.104.115:3000/admin/users?search=open

image
image.png
image
image
image
image
image

导入模板id:8919

image.png
上一篇下一篇

猜你喜欢

热点阅读