feat: initial alert rules
This commit is contained in:
@@ -0,0 +1,55 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: k8s-pod-rules
|
||||
namespace: monitoring
|
||||
labels:
|
||||
release: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: pods.critical
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 15 > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Pod en CrashLoop"
|
||||
description: "Le pod {{ $labels.namespace }}/{{ $labels.pod }} crashe en boucle."
|
||||
|
||||
- alert: KubePodNotReady
|
||||
expr: sum by(namespace, pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Pod non Ready"
|
||||
description: "Le pod {{ $labels.namespace }}/{{ $labels.pod }} n'est pas Ready depuis 15 minutes."
|
||||
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Replicas manquants"
|
||||
description: "Le deployment {{ $labels.namespace }}/{{ $labels.deployment }} n'a pas le bon nombre de replicas."
|
||||
|
||||
- alert: KubeContainerOOMKilled
|
||||
expr: kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Conteneur OOMKilled"
|
||||
description: "Le conteneur {{ $labels.container }} du pod {{ $labels.pod }} a été tué par OOMKiller."
|
||||
|
||||
- alert: KubeHpaMaxedOut
|
||||
expr: kube_horizontalpodautoscaler_status_current_replicas == kube_horizontalpodautoscaler_spec_max_replicas
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "HPA au maximum"
|
||||
description: "L'HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} est à son maximum."
|
||||
Reference in New Issue
Block a user