This commit is contained in:
2025-08-12 18:51:44 -03:00
parent 2dd0edbd49
commit 32a09ed129
13 changed files with 710 additions and 21 deletions

View File

@@ -0,0 +1,65 @@
groups:
- name: web-server
rules:
- alert: CaddyDown
expr: up{job="caddy"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Caddy web server is down"
description: "Caddy web server has been down for more than 1 minute."
- alert: HighHttpErrorRate
expr: rate(caddy_http_responses_total{status=~"5.."}[5m]) / rate(caddy_http_responses_total[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "High HTTP 5xx error rate"
description: "HTTP 5xx error rate is above 10% for more than 5 minutes."
- alert: HighHttpResponseTime
expr: histogram_quantile(0.95, rate(caddy_http_request_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "High HTTP response time"
description: "95th percentile HTTP response time is above 2 seconds for more than 5 minutes."
- alert: CrowdSecDown
expr: up{job="crowdsec"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "CrowdSec is down"
description: "CrowdSec security engine has been down for more than 2 minutes."
- alert: HighActiveConnections
expr: caddy_http_requests_in_flight > 100
for: 5m
labels:
severity: warning
annotations:
summary: "High number of active HTTP connections"
description: "Number of active HTTP connections is above 100 for more than 5 minutes."
- alert: CertificateExpiringSoon
expr: (caddy_tls_cert_not_after - time()) / 86400 < 30
for: 1h
labels:
severity: warning
annotations:
summary: "TLS certificate expiring soon"
description: "TLS certificate for {{ $labels.san }} expires in {{ $value }} days."
- alert: CertificateExpired
expr: caddy_tls_cert_not_after < time()
for: 1m
labels:
severity: critical
annotations:
summary: "TLS certificate expired"
description: "TLS certificate for {{ $labels.san }} has expired."