Files
alerts-rules/rules/nodes.yaml
T
2026-06-15 17:17:06 +02:00

55 lines
1.9 KiB
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: k8s-node-rules
namespace: monitoring
labels:
release: monitoring
spec:
groups:
- name: nodes.critical
rules:
- alert: NodeNotReady
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
for: 15m
labels:
severity: critical
annotations:
summary: "Node NotReady"
description: "Le node {{ $labels.node }} est NotReady depuis 15 minutes."
- alert: NodeMemoryPressure
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == 1
for: 2m
labels:
severity: critical
annotations:
summary: "Node en MemoryPressure"
description: "Le node {{ $labels.node }} est en MemoryPressure."
- alert: NodeDiskPressure
expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
for: 2m
labels:
severity: critical
annotations:
summary: "Node en DiskPressure"
description: "Le node {{ $labels.node }} est en DiskPressure."
- alert: NodeHighCPU
expr: 100 - (avg by(node) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
for: 10m
labels:
severity: warning
annotations:
summary: "CPU node élevé"
description: "CPU du node {{ $labels.node }} > 85%."
- alert: NodeHighMemory
expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: "RAM node élevée"
description: "RAM du node {{ $labels.node }} > 90%."