使用prometheus监控非k8s集群内机器

背景

有几台mysql机器是不在k8s集群中单独部署的,现需要把这几台mysql机器加入到prometheus监控中,只需要监控节点状态信息,不需要监控比如是否有慢SQL等mysql监控。所以只需要在mysql机器上部署node-exporter即可。

步骤

安装node-exporter

wget https://github.com/prometheus/node_exporter/releases/download/v1.5.0/node_exporter-1.5.0.linux-amd64.tar.gz
tar xvf node_exporter-1.5.0.linux-amd64.tar.gz
cd node_exporter-1.5.0.linux-amd64
nohup ./node_exporter &>/dev/null &

查看metrics

file

配置svc

apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/scrape: "true"
  labels:
    app: prometheus-mysql-node-exporter
  name: prometheus-mysql-node-exporter
  namespace: monitoring
spec:
  ports:
  - name: mysql-exporter
    port: 9100
    protocol: TCP
    targetPort: 9100
  selector:
    app: prometheus-mysql-node-exporter
  sessionAffinity: None
  type: ClusterIP

配置ep

指定ip和端口

apiVersion: v1
kind: Endpoints
metadata:
  labels:
    app: prometheus-mysql-node-exporter
  name: prometheus-mysql-node-exporter
  namespace: monitoring
subsets:
- addresses:
  - ip: 192.168.1.1
    nodeName: mysql
  ports:
  - name: mysql-exporter
    port: 9100
    protocol: TCP

配置ServiceMonitor

apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  labels:
    app: prometheus-mysql-node-exporter
  name: mysql-node-exporter
  namespace: monitoring
spec:
  jobLabel: mysql-node-exporter
  endpoints:
    - port: mysql-exporter
      interval: 15s
      honorLabels: true
      scheme: http
      relabelings:
      - action: replace
        regex: (.*)
        replacement: $1
        sourceLabels:
        - __meta_kubernetes_endpoint_node_name
        targetLabel: instance
  selector:
    matchLabels:
      app: prometheus-mysql-node-exporter

查看target

file

配置grafana

导入json

{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": 36,
  "iteration": 1681200229063,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 11,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null as zero",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "(1 - avg(irate(node_cpu_seconds_total{mode=\"idle\", instance!~\".*kube.*\"}[1m])) by (instance))",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "CPU Utilisation",
      "tooltip": {
        "shared": true,
        "sort": 2,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "percentunit",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 3,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sideWidth": null,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "repeat": null,
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "node_load1{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "1m load average",
          "refId": "A"
        },
        {
          "expr": "node_load5{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "5m load average",
          "refId": "B"
        },
        {
          "expr": "node_load15{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "15m load average",
          "refId": "C"
        },
        {
          "expr": "count(node_cpu_seconds_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", mode=\"idle\"})",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "logical cores",
          "refId": "D"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Load Average",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "cacheTimeout": null,
      "colorBackground": false,
      "colorValue": false,
      "colors": [
        "rgba(50, 172, 45, 0.97)",
        "rgba(237, 129, 40, 0.89)",
        "rgba(245, 54, 54, 0.9)"
      ],
      "datasource": "$datasource",
      "format": "percent",
      "gauge": {
        "maxValue": 100,
        "minValue": 0,
        "show": true,
        "thresholdLabels": false,
        "thresholdMarkers": true
      },
      "gridPos": {
        "h": 7,
        "w": 6,
        "x": 18,
        "y": 7
      },
      "id": 5,
      "interval": null,
      "links": [],
      "mappingType": 1,
      "mappingTypes": [
        {
          "name": "value to text",
          "value": 1
        },
        {
          "name": "range to text",
          "value": 2
        }
      ],
      "maxDataPoints": 100,
      "nullPointMode": "connected",
      "nullText": null,
      "options": {},
      "postfix": "",
      "postfixFontSize": "50%",
      "prefix": "",
      "prefixFontSize": "50%",
      "rangeMaps": [
        {
          "from": "null",
          "text": "N/A",
          "to": "null"
        }
      ],
      "sparkline": {
        "fillColor": "rgba(31, 118, 189, 0.18)",
        "full": false,
        "lineColor": "rgb(31, 120, 193)",
        "show": false
      },
      "tableColumn": "",
      "targets": [
        {
          "expr": "100 -\n(\n  avg(node_memory_MemAvailable_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"})\n/\n  avg(node_memory_MemTotal_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "thresholds": "80, 90",
      "title": "Memory Usage",
      "type": "singlestat",
      "valueFontSize": "80%",
      "valueMaps": [
        {
          "op": "=",
          "text": "N/A",
          "value": "null"
        }
      ],
      "valueName": "current"
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 18,
        "x": 0,
        "y": 8
      },
      "id": 4,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sideWidth": null,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "repeat": null,
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": true,
      "steppedLine": false,
      "targets": [
        {
          "expr": "(\n  node_memory_MemTotal_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n-\n  node_memory_MemFree_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n-\n  node_memory_Buffers_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n-\n  node_memory_Cached_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n)\n",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "memory used",
          "refId": "A"
        },
        {
          "expr": "node_memory_Buffers_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "memory buffers",
          "refId": "B"
        },
        {
          "expr": "node_memory_Cached_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "memory cached",
          "refId": "C"
        },
        {
          "expr": "node_memory_MemFree_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "memory free",
          "refId": "D"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Memory Usage",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        },
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 0,
        "y": 15
      },
      "id": 6,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sideWidth": null,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "repeat": null,
      "seriesOverrides": [
        {
          "alias": "/ read| written/",
          "yaxis": 1
        },
        {
          "alias": "/ io time/",
          "yaxis": 2
        }
      ],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(node_disk_read_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
          "format": "time_series",
          "interval": "1m",
          "intervalFactor": 2,
          "legendFormat": "{{device}} read",
          "refId": "A"
        },
        {
          "expr": "rate(node_disk_written_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
          "format": "time_series",
          "interval": "1m",
          "intervalFactor": 2,
          "legendFormat": "{{device}} written",
          "refId": "B"
        },
        {
          "expr": "rate(node_disk_io_time_seconds_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
          "format": "time_series",
          "interval": "1m",
          "intervalFactor": 2,
          "legendFormat": "{{device}} io time",
          "refId": "C"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Disk I/O",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "s",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 12,
        "y": 15
      },
      "id": 7,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sideWidth": null,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null as zero",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "repeat": null,
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": true,
      "steppedLine": false,
      "targets": [
        {
          "expr": "1 -\r\n(\r\n  max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"prometheus-mysql-node-exporter\", fstype!=\"\", instance=\"$instance\"})\r\n/\r\n  max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"prometheus-mysql-node-exporter\", fstype!=\"\", instance=\"$instance\"})\r\n)",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "{{device}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Disk Space Usage",
      "tooltip": {
        "shared": true,
        "sort": 2,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "percentunit",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        },
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": false
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 0,
        "y": 22
      },
      "id": 8,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sideWidth": null,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "repeat": null,
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(node_network_receive_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
          "format": "time_series",
          "interval": "1m",
          "intervalFactor": 2,
          "legendFormat": "{{device}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Network Received",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        },
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "$datasource",
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 12,
        "y": 22
      },
      "id": 9,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sideWidth": null,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "repeat": null,
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "rate(node_network_transmit_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
          "format": "time_series",
          "interval": "1m",
          "intervalFactor": 2,
          "legendFormat": "{{device}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Network Transmitted",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        },
        {
          "format": "bytes",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": 0,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "refresh": false,
  "schemaVersion": 20,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "current": {
          "text": "prometheus",
          "value": "prometheus"
        },
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": false,
        "name": "datasource",
        "options": [],
        "query": "prometheus",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      },
      {
        "allValue": null,
        "current": {
          "text": "mdsn01",
          "value": "mdsn01"
        },
        "datasource": "$datasource",
        "definition": "",
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": false,
        "name": "instance",
        "options": [],
        "query": "label_values(node_exporter_build_info{job=\"prometheus-mysql-node-exporter\"}, instance)",
        "refresh": 2,
        "regex": "^(?:(?!9100).)*$",
        "skipUrlSync": false,
        "sort": 0,
        "tagValuesQuery": "",
        "tags": [],
        "tagsQuery": "",
        "type": "query",
        "useTags": false
      }
    ]
  },
  "time": {
    "from": "now-5m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ],
    "time_options": [
      "5m",
      "15m",
      "1h",
      "6h",
      "12h",
      "24h",
      "2d",
      "7d",
      "30d"
    ]
  },
  "timezone": "browser",
  "title": "mysql-nodes",
  "uid": "fa49a4706d07a042595b664c87fb33eb",
  "version": 10
}

数据可以正常显示。

file

注意

grafana中如果还有其他k8s集群中的node-exporter,可以使用正则来删除。

^(?:(?!9100).)*$
# /.*:9100$/ (只过滤还有:9100的)

file

还有CPU使用率也可以用promql来删除。

(1 - avg(irate(node_cpu_seconds_total{mode="idle", instance!~".*kube.*"}[1m])) by (instance))

配置告警

导入PrometheusRule即可。

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    prometheus: k8s
    role: alert-rules
  name: mysql-node-alerting
  namespace: monitoring
spec:
  groups:
  - name: mysql-node-alerting
    rules:
    - alert: NodeDiskCapacityLow
      annotations:
        message: Node {{ $labels.node }} remain disk capacity is too low.
      expr: 100 - ((sum(node_filesystem_avail_bytes{job="prometheus-mysql-node-exporter", fstype!=""}) by (instance) * 100) / (sum(node_filesystem_size_bytes{job="prometheus-mysql-node-exporter", fstype!=""}) by (instance))) > 85
      for: 10m
      labels:
        severity: critical
    - alert: NodeMemUsedHigh
      annotations:
        message: Node {{ $labels.node }} Memory Utilisation has more then 80%
      expr: '1 - (node_memory_MemAvailable_bytes{job="prometheus-mysql-node-exporter"} / node_memory_MemTotal_bytes{job="prometheus-mysql-node-exporter"}) > 0.95'
      for: 1m
      labels:
        severity: warnning
    - alert: NodeProcessHigh
      annotations:
        message: Node {{ $labels.node }} has too many running processes.
      expr: sum by (process_name,instance) (rate(process_cpu_seconds_total{job="prometheus-mysql-node-exporter", mode!="idle"}[1m])) * 100 > 90
      for: 10m
      labels:
        severity: critical
    - alert: NodeFileDescriptorsHigh
      annotations:
        message: Node {{ $labels.instance }} has too many file descriptors.
      expr: sum(node_filefd_allocated{job="prometheus-mysql-node-exporter"}) by (instance) / 64000 * 100 > 90
      for: 10m
      labels:
        severity: warnning
    - alert: NodeCPUHigh
      annotations:
        message: Node {{ $labels.node }} CPU Utilisation has more then 70%
      expr: (1 - avg(irate(node_cpu_seconds_total{mode="idle", instance!~".*kube.*"}[1m])) by (instance)) > 0.7
      for: 1m
      labels:
        severity: warnning
    - alert: NodeLoad1High
      annotations:
        message: Node {{ $labels.instance }} 1min avg load has more then 80% of the
          node load capacity
      expr: (sum(node_load1{job="prometheus-mysql-node-exporter"}) by (instance)) / (count(node_cpu_seconds_total{job="prometheus-mysql-node-exporter",mode="system"})
        by (instance)) > 0.8
      for: 1m
      labels:
        severity: warnning
    - alert: NodeLoad5High
      annotations:
        message: Node {{ $labels.instance }} 5min avg load has more then 80% of the
          node load capacity
      expr: (sum(node_load5{job="prometheus-mysql-node-exporter"}) by (instance)) / (count(node_cpu_seconds_total{job="prometheus-mysql-node-exporter",mode="system"})
        by (instance)) > 0.8
      for: 1m
      labels:
        severity: warnning
    - alert: NodeLoad15High
      annotations:
        message: Node {{ $labels.instance }} 15min avg load has more then 80% of the
          node load capacity
      expr: (sum(node_load15{job="prometheus-mysql-node-exporter"}) by (instance)) / (count(node_cpu_seconds_total{job="prometheus-mysql-node-exporter",mode="system"})
        by (instance)) > 0.8
      for: 1m
      labels:
        severity: warnning

下面是内存使用过高的企业微信告警。

file

0 0 投票数
文章评分
订阅评论
提醒
guest

0 评论
内联反馈
查看所有评论

相关文章

开始在上面输入您的搜索词,然后按回车进行搜索。按ESC取消。

返回顶部
0
希望看到您的想法,请您发表评论x