feat: initial alert rules
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: k8s-node-rules
|
||||
namespace: monitoring
|
||||
labels:
|
||||
release: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: nodes.critical
|
||||
rules:
|
||||
- alert: NodeNotReady
|
||||
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Node NotReady"
|
||||
description: "Le node {{ $labels.node }} est NotReady depuis 15 minutes."
|
||||
|
||||
- alert: NodeMemoryPressure
|
||||
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == 1
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Node en MemoryPressure"
|
||||
description: "Le node {{ $labels.node }} est en MemoryPressure."
|
||||
|
||||
- alert: NodeDiskPressure
|
||||
expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Node en DiskPressure"
|
||||
description: "Le node {{ $labels.node }} est en DiskPressure."
|
||||
|
||||
- alert: NodeHighCPU
|
||||
expr: 100 - (avg by(node) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "CPU node élevé"
|
||||
description: "CPU du node {{ $labels.node }} > 85%."
|
||||
|
||||
- alert: NodeHighMemory
|
||||
expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 > 90
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "RAM node élevée"
|
||||
description: "RAM du node {{ $labels.node }} > 90%."
|
||||
Reference in New Issue
Block a user