apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: k8s-cluster-rules namespace: monitoring labels: release: monitoring spec: groups: - name: cluster.critical interval: 30s rules: - alert: KubeAPIServerDown expr: absent(up{job="apiserver"} == 1) for: 5m labels: severity: critical annotations: summary: "API Server inaccessible" description: "L'API server ne répond plus depuis 5 minutes." - alert: EtcdDown expr: absent(up{job="etcd"} == 1) for: 5m labels: severity: critical annotations: summary: "etcd inaccessible" description: "etcd ne répond plus." - alert: KubeSchedulerDown expr: absent(up{job="kube-scheduler"} == 1) for: 5m labels: severity: critical annotations: summary: "Scheduler inaccessible" description: "Plus aucun pod ne peut être schedulé." - alert: KubeControllerManagerDown expr: absent(up{job="kube-controller-manager"} == 1) for: 5m labels: severity: critical annotations: summary: "Controller Manager inaccessible" description: "Les deployments et replicasets ne fonctionnent plus." - alert: etcdHighCommitDurations expr: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) > 0.25 for: 10m labels: severity: warning annotations: summary: "Latence etcd élevée" description: "La latence de commit etcd dépasse 250ms."