mirror of
https://github.com/elAgala/server-initializer.git
synced 2026-02-14 13:16:17 +00:00
save
This commit is contained in:
65
templates/monitoring/alerts/web-server.yml
Normal file
65
templates/monitoring/alerts/web-server.yml
Normal file
@@ -0,0 +1,65 @@
|
||||
groups:
|
||||
- name: web-server
|
||||
rules:
|
||||
- alert: CaddyDown
|
||||
expr: up{job="caddy"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Caddy web server is down"
|
||||
description: "Caddy web server has been down for more than 1 minute."
|
||||
|
||||
- alert: HighHttpErrorRate
|
||||
expr: rate(caddy_http_responses_total{status=~"5.."}[5m]) / rate(caddy_http_responses_total[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High HTTP 5xx error rate"
|
||||
description: "HTTP 5xx error rate is above 10% for more than 5 minutes."
|
||||
|
||||
- alert: HighHttpResponseTime
|
||||
expr: histogram_quantile(0.95, rate(caddy_http_request_duration_seconds_bucket[5m])) > 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High HTTP response time"
|
||||
description: "95th percentile HTTP response time is above 2 seconds for more than 5 minutes."
|
||||
|
||||
- alert: CrowdSecDown
|
||||
expr: up{job="crowdsec"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "CrowdSec is down"
|
||||
description: "CrowdSec security engine has been down for more than 2 minutes."
|
||||
|
||||
- alert: HighActiveConnections
|
||||
expr: caddy_http_requests_in_flight > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High number of active HTTP connections"
|
||||
description: "Number of active HTTP connections is above 100 for more than 5 minutes."
|
||||
|
||||
- alert: CertificateExpiringSoon
|
||||
expr: (caddy_tls_cert_not_after - time()) / 86400 < 30
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "TLS certificate expiring soon"
|
||||
description: "TLS certificate for {{ $labels.san }} expires in {{ $value }} days."
|
||||
|
||||
- alert: CertificateExpired
|
||||
expr: caddy_tls_cert_not_after < time()
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "TLS certificate expired"
|
||||
description: "TLS certificate for {{ $labels.san }} has expired."
|
||||
Reference in New Issue
Block a user