feat: initial alert rules
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: k8s-network-rules
|
||||
namespace: monitoring
|
||||
labels:
|
||||
release: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: network.critical
|
||||
rules:
|
||||
- alert: CoreDNSDown
|
||||
expr: absent(up{job="coredns"} == 1)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "CoreDNS inaccessible"
|
||||
description: "CoreDNS est indisponible, la résolution DNS interne est cassée."
|
||||
|
||||
- alert: KubeAPIServerLatencyHigh
|
||||
expr: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!~"WATCH|LIST"}[5m])) by (le)) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Latence API Server élevée"
|
||||
description: "La latence p99 de l'API server dépasse 1 seconde."
|
||||
Reference in New Issue
Block a user