56 lines
1.8 KiB
YAML
56 lines
1.8 KiB
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: k8s-cluster-rules
|
|
namespace: monitoring
|
|
labels:
|
|
release: monitoring
|
|
spec:
|
|
groups:
|
|
- name: cluster.critical
|
|
interval: 30s
|
|
rules:
|
|
- alert: KubeAPIServerDown
|
|
expr: absent(up{job="apiserver"} == 1)
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "API Server inaccessible"
|
|
description: "L'API server ne répond plus depuis 5 minutes."
|
|
|
|
- alert: EtcdDown
|
|
expr: absent(up{job="etcd"} == 1)
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "etcd inaccessible"
|
|
description: "etcd ne répond plus."
|
|
|
|
- alert: KubeSchedulerDown
|
|
expr: absent(up{job="kube-scheduler"} == 1)
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Scheduler inaccessible"
|
|
description: "Plus aucun pod ne peut être schedulé."
|
|
|
|
- alert: KubeControllerManagerDown
|
|
expr: absent(up{job="kube-controller-manager"} == 1)
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Controller Manager inaccessible"
|
|
description: "Les deployments et replicasets ne fonctionnent plus."
|
|
|
|
- alert: etcdHighCommitDurations
|
|
expr: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) > 0.25
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Latence etcd élevée"
|
|
description: "La latence de commit etcd dépasse 250ms." |