背景
有几台mysql机器是不在k8s集群中单独部署的,现需要把这几台mysql机器加入到prometheus监控中,只需要监控节点状态信息,不需要监控比如是否有慢SQL等mysql监控。所以只需要在mysql机器上部署node-exporter即可。
步骤
安装node-exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.5.0/node_exporter-1.5.0.linux-amd64.tar.gz
tar xvf node_exporter-1.5.0.linux-amd64.tar.gz
cd node_exporter-1.5.0.linux-amd64
nohup ./node_exporter &>/dev/null &
查看metrics
配置svc
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
labels:
app: prometheus-mysql-node-exporter
name: prometheus-mysql-node-exporter
namespace: monitoring
spec:
ports:
- name: mysql-exporter
port: 9100
protocol: TCP
targetPort: 9100
selector:
app: prometheus-mysql-node-exporter
sessionAffinity: None
type: ClusterIP
配置ep
指定ip和端口
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: prometheus-mysql-node-exporter
name: prometheus-mysql-node-exporter
namespace: monitoring
subsets:
- addresses:
- ip: 192.168.1.1
nodeName: mysql
ports:
- name: mysql-exporter
port: 9100
protocol: TCP
配置ServiceMonitor
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: prometheus-mysql-node-exporter
name: mysql-node-exporter
namespace: monitoring
spec:
jobLabel: mysql-node-exporter
endpoints:
- port: mysql-exporter
interval: 15s
honorLabels: true
scheme: http
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_endpoint_node_name
targetLabel: instance
selector:
matchLabels:
app: prometheus-mysql-node-exporter
查看target
配置grafana
导入json
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 36,
"iteration": 1681200229063,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 11,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "(1 - avg(irate(node_cpu_seconds_total{mode=\"idle\", instance!~\".*kube.*\"}[1m])) by (instance))",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU Utilisation",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_load1{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "1m load average",
"refId": "A"
},
{
"expr": "node_load5{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5m load average",
"refId": "B"
},
{
"expr": "node_load15{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "15m load average",
"refId": "C"
},
{
"expr": "count(node_cpu_seconds_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", mode=\"idle\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "logical cores",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Load Average",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "$datasource",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 7
},
"id": 5,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "80, 90",
"title": "Memory Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 18,
"x": 0,
"y": 8
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "(\n node_memory_MemTotal_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory used",
"refId": "A"
},
{
"expr": "node_memory_Buffers_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory buffers",
"refId": "B"
},
{
"expr": "node_memory_Cached_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory cached",
"refId": "C"
},
{
"expr": "node_memory_MemFree_bytes{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory free",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 15
},
"id": 6,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "/ read| written/",
"yaxis": 1
},
{
"alias": "/ io time/",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_read_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{device}} read",
"refId": "A"
},
{
"expr": "rate(node_disk_written_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{device}} written",
"refId": "B"
},
{
"expr": "rate(node_disk_io_time_seconds_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{device}} io time",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Disk I/O",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 15
},
"id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "1 -\r\n(\r\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"prometheus-mysql-node-exporter\", fstype!=\"\", instance=\"$instance\"})\r\n/\r\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"prometheus-mysql-node-exporter\", fstype!=\"\", instance=\"$instance\"})\r\n)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Disk Space Usage",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 22
},
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_network_receive_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Network Received",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 22
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_network_transmit_bytes_total{job=\"prometheus-mysql-node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Network Transmitted",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": false,
"schemaVersion": 20,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"text": "prometheus",
"value": "prometheus"
},
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": null,
"current": {
"text": "mdsn01",
"value": "mdsn01"
},
"datasource": "$datasource",
"definition": "",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "instance",
"options": [],
"query": "label_values(node_exporter_build_info{job=\"prometheus-mysql-node-exporter\"}, instance)",
"refresh": 2,
"regex": "^(?:(?!9100).)*$",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "mysql-nodes",
"uid": "fa49a4706d07a042595b664c87fb33eb",
"version": 10
}
数据可以正常显示。
注意
grafana中如果还有其他k8s集群中的node-exporter,可以使用正则来删除。
^(?:(?!9100).)*$
# /.*:9100$/ (只过滤还有:9100的)
还有CPU使用率也可以用promql来删除。
(1 - avg(irate(node_cpu_seconds_total{mode="idle", instance!~".*kube.*"}[1m])) by (instance))
配置告警
导入PrometheusRule即可。
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: mysql-node-alerting
namespace: monitoring
spec:
groups:
- name: mysql-node-alerting
rules:
- alert: NodeDiskCapacityLow
annotations:
message: Node {{ $labels.node }} remain disk capacity is too low.
expr: 100 - ((sum(node_filesystem_avail_bytes{job="prometheus-mysql-node-exporter", fstype!=""}) by (instance) * 100) / (sum(node_filesystem_size_bytes{job="prometheus-mysql-node-exporter", fstype!=""}) by (instance))) > 85
for: 10m
labels:
severity: critical
- alert: NodeMemUsedHigh
annotations:
message: Node {{ $labels.node }} Memory Utilisation has more then 80%
expr: '1 - (node_memory_MemAvailable_bytes{job="prometheus-mysql-node-exporter"} / node_memory_MemTotal_bytes{job="prometheus-mysql-node-exporter"}) > 0.95'
for: 1m
labels:
severity: warnning
- alert: NodeProcessHigh
annotations:
message: Node {{ $labels.node }} has too many running processes.
expr: sum by (process_name,instance) (rate(process_cpu_seconds_total{job="prometheus-mysql-node-exporter", mode!="idle"}[1m])) * 100 > 90
for: 10m
labels:
severity: critical
- alert: NodeFileDescriptorsHigh
annotations:
message: Node {{ $labels.instance }} has too many file descriptors.
expr: sum(node_filefd_allocated{job="prometheus-mysql-node-exporter"}) by (instance) / 64000 * 100 > 90
for: 10m
labels:
severity: warnning
- alert: NodeCPUHigh
annotations:
message: Node {{ $labels.node }} CPU Utilisation has more then 70%
expr: (1 - avg(irate(node_cpu_seconds_total{mode="idle", instance!~".*kube.*"}[1m])) by (instance)) > 0.7
for: 1m
labels:
severity: warnning
- alert: NodeLoad1High
annotations:
message: Node {{ $labels.instance }} 1min avg load has more then 80% of the
node load capacity
expr: (sum(node_load1{job="prometheus-mysql-node-exporter"}) by (instance)) / (count(node_cpu_seconds_total{job="prometheus-mysql-node-exporter",mode="system"})
by (instance)) > 0.8
for: 1m
labels:
severity: warnning
- alert: NodeLoad5High
annotations:
message: Node {{ $labels.instance }} 5min avg load has more then 80% of the
node load capacity
expr: (sum(node_load5{job="prometheus-mysql-node-exporter"}) by (instance)) / (count(node_cpu_seconds_total{job="prometheus-mysql-node-exporter",mode="system"})
by (instance)) > 0.8
for: 1m
labels:
severity: warnning
- alert: NodeLoad15High
annotations:
message: Node {{ $labels.instance }} 15min avg load has more then 80% of the
node load capacity
expr: (sum(node_load15{job="prometheus-mysql-node-exporter"}) by (instance)) / (count(node_cpu_seconds_total{job="prometheus-mysql-node-exporter",mode="system"})
by (instance)) > 0.8
for: 1m
labels:
severity: warnning
下面是内存使用过高的企业微信告警。