apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: k8s-pod-rules namespace: monitoring labels: release: monitoring spec: groups: - name: pods.critical rules: - alert: KubePodCrashLooping expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 15 > 0 for: 15m labels: severity: critical annotations: summary: "Pod en CrashLoop" description: "Le pod {{ $labels.namespace }}/{{ $labels.pod }} crashe en boucle." - alert: KubePodNotReady expr: sum by(namespace, pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}) > 0 for: 15m labels: severity: critical annotations: summary: "Pod non Ready" description: "Le pod {{ $labels.namespace }}/{{ $labels.pod }} n'est pas Ready depuis 15 minutes." - alert: KubeDeploymentReplicasMismatch expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available for: 15m labels: severity: critical annotations: summary: "Replicas manquants" description: "Le deployment {{ $labels.namespace }}/{{ $labels.deployment }} n'a pas le bon nombre de replicas." - alert: KubeContainerOOMKilled expr: kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1 for: 0m labels: severity: warning annotations: summary: "Conteneur OOMKilled" description: "Le conteneur {{ $labels.container }} du pod {{ $labels.pod }} a été tué par OOMKiller." - alert: KubeHpaMaxedOut expr: kube_horizontalpodautoscaler_status_current_replicas == kube_horizontalpodautoscaler_spec_max_replicas for: 15m labels: severity: warning annotations: summary: "HPA au maximum" description: "L'HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} est à son maximum."