From 130575ffd3b21fc88f66dff756e6966b2255e562 Mon Sep 17 00:00:00 2001 From: elAgala Date: Wed, 9 Jul 2025 01:30:11 -0300 Subject: [PATCH] refactor: rethink monitoring structure | add Loki && Promtail | new Grafana conf --- monitoring/install_prometheus.sh | 22 +++++++- .../caddy/sites-enabled/prometheus.Caddyfile | 45 ++++++++++----- .../monitoring/docker-compose.grafana.yml | 45 +++++---------- templates/monitoring/docker-compose.yml | 56 ++++++++++++++++++- templates/monitoring/loki/loki.yml | 44 +++++++++++++++ templates/monitoring/prometheus.yml | 36 +++++++++--- templates/monitoring/promtail/promtail.yml | 50 +++++++++++++++++ 7 files changed, 236 insertions(+), 62 deletions(-) create mode 100644 templates/monitoring/loki/loki.yml create mode 100644 templates/monitoring/promtail/promtail.yml diff --git a/monitoring/install_prometheus.sh b/monitoring/install_prometheus.sh index 36245a3..5c581cc 100644 --- a/monitoring/install_prometheus.sh +++ b/monitoring/install_prometheus.sh @@ -6,12 +6,28 @@ function install_prometheus() { username="$1" monitoring_dir="/home/$username/monitoring" - echo "[ MONITOR ]: Starting Prometheus setup" + echo "[ MONITOR ]: Starting monitoring setup" mkdir -p "$monitoring_dir" + mkdir -p "$monitoring_dir/loki" + mkdir -p "$monitoring_dir/promtail" + + # Download main monitoring files wget "$REPO_URL/$TEMPLATE_PATH/docker-compose.yml" -O "$monitoring_dir/docker-compose.yml" wget "$REPO_URL/$TEMPLATE_PATH/prometheus.yml" -O "$monitoring_dir/prometheus.yml" + + # Download Loki configuration + wget "$REPO_URL/$TEMPLATE_PATH/loki/loki.yml" -O "$monitoring_dir/loki/loki.yml" + + # Download Promtail configuration + wget "$REPO_URL/$TEMPLATE_PATH/promtail/promtail.yml" -O "$monitoring_dir/promtail/promtail.yml" + cd "$monitoring_dir" - echo "[ MONITOR ]: Prometheus Installed. Starting on docker container" + echo "[ MONITOR ]: Monitoring stack installed. Starting containers" sudo docker compose up -d - echo "[ MONITOR ]: Prometheus up & running on port 9090" + echo "[ MONITOR ]: Monitoring stack running:" + echo " - Prometheus: http://localhost:9090 (internal)" + echo " - Prometheus API: https://YOUR_SERVER_IP/prometheus/ (external)" + echo " - Loki: http://localhost:3100 (internal)" + echo " - Node Exporter: http://localhost:9100 (internal)" + echo " - cAdvisor: http://localhost:8080 (internal)" } diff --git a/templates/caddy/full/caddy/sites-enabled/prometheus.Caddyfile b/templates/caddy/full/caddy/sites-enabled/prometheus.Caddyfile index 92ca2a2..6f961e0 100644 --- a/templates/caddy/full/caddy/sites-enabled/prometheus.Caddyfile +++ b/templates/caddy/full/caddy/sites-enabled/prometheus.Caddyfile @@ -1,20 +1,35 @@ -# Prometheus monitoring endpoint -# Bypasses WAF for API endpoints since Prometheus scraping doesn't need WAF protection -prometheus.example.com { - basic_auth { - agala {$PROMETHEUS_PASSWORD} - } +# Prometheus API endpoint for external Grafana access via IP +# Access via: https://YOUR_SERVER_IP/prometheus/ +:443 { + # Basic auth for Prometheus path + handle_path /prometheus/* { + basic_auth { + prometheus {$PROMETHEUS_PASSWORD} + } - @waf { - not path /api/v1/* - } + # Only allow Prometheus API endpoints that Grafana needs + @allowed_endpoints { + path /api/v1/* + path /federate + path /metrics + } - handle @waf { - coraza_waf { - directives ` - Include /etc/caddy/coraza.conf - ` + # Block everything else (UI, admin endpoints, etc.) + handle { + @blocked { + not path /api/v1/* + not path /federate + not path /metrics + } + respond @blocked "API access only" 403 + } + + # Forward only allowed endpoints (no WAF needed for API) + handle @allowed_endpoints { + reverse_proxy * http://prometheus:9090 } } - reverse_proxy * http://prometheus:9090 + + # Default response for other paths + respond "Server monitoring" 200 } \ No newline at end of file diff --git a/templates/monitoring/docker-compose.grafana.yml b/templates/monitoring/docker-compose.grafana.yml index ea96ed2..70ad8b8 100644 --- a/templates/monitoring/docker-compose.grafana.yml +++ b/templates/monitoring/docker-compose.grafana.yml @@ -1,40 +1,21 @@ -services: - # PORT 9090 - prometheus: - image: prom/prometheus:latest - container_name: prometheus - restart: always - volumes: - - ./prometheus.yml:/etc/prometheus/prometheus.yml - networks: - - monitoring_net - - caddy_net +# Grafana add-on for command center servers +# Use: docker compose -f docker-compose.yml -f docker-compose.grafana.yml up -d +services: # PORT 3000 grafana: - image: grafana/grafana:latest + image: grafana/grafana:11.4.1 container_name: grafana - restart: always + restart: unless-stopped + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning environment: - - GF_SECURITY_ADMIN_PASSWORD=YOUR_PASSWORD - - GE_SERVER_ROOT_URL=YOUR_URL - depends_on: - - prometheus + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD} + - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource networks: - - monitoring_net - caddy_net + # Note: Grafana connects to external Prometheus instances via HTTPS - # PORT 9100 - node_exporter: - image: prom/node-exporter:latest - container_name: node-exporter - restart: always - networks: - - monitoring_net - -networks: - monitoring_net: - driver: bridge - caddy_net: - external: true - +volumes: + grafana_data: \ No newline at end of file diff --git a/templates/monitoring/docker-compose.yml b/templates/monitoring/docker-compose.yml index f9b6543..e11af39 100644 --- a/templates/monitoring/docker-compose.yml +++ b/templates/monitoring/docker-compose.yml @@ -1,20 +1,39 @@ services: - # PORT 9099 + # PORT 9090 (internal only) prometheus: image: prom/prometheus:v3.4.2 container_name: prometheus restart: always volumes: - ./prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus_data:/prometheus networks: - monitoring_net - caddy_net + # No ports exposed - access via Caddy only + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=30d' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--web.enable-lifecycle' # PORT 9100 node_exporter: image: prom/node-exporter:v1.9.1 container_name: node-exporter restart: always + pid: host + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' networks: - monitoring_net @@ -31,10 +50,41 @@ services: restart: unless-stopped networks: - monitoring_net + command: + - '--housekeeping_interval=10s' + - '--docker_only=true' + + # PORT 3100 + loki: + image: grafana/loki:3.4.0 + container_name: loki + volumes: + - ./loki/loki.yml:/etc/loki/local-config.yaml + - loki_data:/loki + restart: unless-stopped + networks: + - monitoring_net + command: -config.file=/etc/loki/local-config.yaml + + # PORT 9080 + promtail: + image: grafana/promtail:3.4.0 + container_name: promtail + volumes: + - ./promtail/promtail.yml:/etc/promtail/config.yml + - ../caddy/logs:/var/log/caddy:ro + - /var/log:/var/log:ro + restart: unless-stopped + networks: + - monitoring_net + command: -config.file=/etc/promtail/config.yml + +volumes: + prometheus_data: + loki_data: networks: monitoring_net: external: true caddy_net: - external: true - + external: true \ No newline at end of file diff --git a/templates/monitoring/loki/loki.yml b/templates/monitoring/loki/loki.yml new file mode 100644 index 0000000..1f4da99 --- /dev/null +++ b/templates/monitoring/loki/loki.yml @@ -0,0 +1,44 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + reject_old_samples: true + reject_old_samples_max_age: 168h + retention_period: 168h + +compactor: + working_directory: /loki/boltdb-shipper-compactor + retention_enabled: true + retention_delete_delay: 2h \ No newline at end of file diff --git a/templates/monitoring/prometheus.yml b/templates/monitoring/prometheus.yml index 4a9528a..6d9412c 100644 --- a/templates/monitoring/prometheus.yml +++ b/templates/monitoring/prometheus.yml @@ -1,8 +1,12 @@ global: scrape_interval: 15s + evaluation_interval: 15s + +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" scrape_configs: - - job_name: 'prometheus' static_configs: - targets: ['prometheus:9090'] @@ -10,15 +14,29 @@ scrape_configs: - job_name: 'node' static_configs: - targets: ['node-exporter:9100'] - - - job_name: 'crowdsec' - static_configs: - - targets: ['crowdsec:6060'] - - - job_name: 'caddy' - static_configs: - - targets: ['caddy:2019'] + scrape_interval: 5s + metrics_path: /metrics - job_name: 'cadvisor' static_configs: - targets: ['cadvisor:8080'] + scrape_interval: 5s + metrics_path: /metrics + + - job_name: 'caddy' + static_configs: + - targets: ['caddy:2019'] + scrape_interval: 5s + metrics_path: /metrics + + - job_name: 'crowdsec' + static_configs: + - targets: ['crowdsec:6060'] + scrape_interval: 30s + metrics_path: /metrics + + - job_name: 'loki' + static_configs: + - targets: ['loki:3100'] + scrape_interval: 15s + metrics_path: /metrics \ No newline at end of file diff --git a/templates/monitoring/promtail/promtail.yml b/templates/monitoring/promtail/promtail.yml new file mode 100644 index 0000000..4e2efcb --- /dev/null +++ b/templates/monitoring/promtail/promtail.yml @@ -0,0 +1,50 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + # Caddy access logs + - job_name: caddy + static_configs: + - targets: + - localhost + labels: + job: caddy + __path__: /var/log/caddy/access.log + pipeline_stages: + - json: + expressions: + timestamp: ts + level: level + message: msg + method: request.method + uri: request.uri + status: resp_headers.status + duration: duration + - labels: + method: + status: + - timestamp: + source: timestamp + format: Unix + + # System logs + - job_name: syslog + static_configs: + - targets: + - localhost + labels: + job: syslog + __path__: /var/log/syslog + pipeline_stages: + - regex: + expression: '^(?P\w+\s+\d+\s+\d+:\d+:\d+)\s+(?P\w+)\s+(?P\w+).*' + - labels: + hostname: + service: \ No newline at end of file