55 lines
1.9 KiB
YAML
55 lines
1.9 KiB
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: k8s-node-rules
|
|
namespace: monitoring
|
|
labels:
|
|
release: monitoring
|
|
spec:
|
|
groups:
|
|
- name: nodes.critical
|
|
rules:
|
|
- alert: NodeNotReady
|
|
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Node NotReady"
|
|
description: "Le node {{ $labels.node }} est NotReady depuis 15 minutes."
|
|
|
|
- alert: NodeMemoryPressure
|
|
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == 1
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Node en MemoryPressure"
|
|
description: "Le node {{ $labels.node }} est en MemoryPressure."
|
|
|
|
- alert: NodeDiskPressure
|
|
expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Node en DiskPressure"
|
|
description: "Le node {{ $labels.node }} est en DiskPressure."
|
|
|
|
- alert: NodeHighCPU
|
|
expr: 100 - (avg by(node) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "CPU node élevé"
|
|
description: "CPU du node {{ $labels.node }} > 85%."
|
|
|
|
- alert: NodeHighMemory
|
|
expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 > 90
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "RAM node élevée"
|
|
description: "RAM du node {{ $labels.node }} > 90%." |