Full observability stack with Telegram alerting: Components: - Prometheus: metrics collection, 90-day retention - Grafana: dashboards at monitoring.performancewest.net - Alertmanager: routes alerts to Telegram bot - node-exporter: OS metrics (CPU, RAM, disk, network) - cAdvisor: container metrics (CPU, memory, restarts) - postgres-exporter: PostgreSQL connection/query metrics - nginx-exporter: request rate, 5xx errors, connections - blackbox-exporter: HTTP/TCP endpoint probing + SSL cert checks Alert rules: - Service down (HTTP probe, TCP port, container missing) - Container restart loops - High CPU/memory/disk/load - PostgreSQL down or high connections - SSL cert expiring (14d warning, 3d critical) - Slow HTTP responses, high 5xx rate Blackbox probes all public endpoints: performancewest.net, api, dev, crm, lists, analytics, minio, crypto, pay Telegram alerts: critical=1h repeat, warning=6h repeat, auto-resolve notifications Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
91 lines
3.7 KiB
YAML
91 lines
3.7 KiB
YAML
---
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Monitoring Role — Prometheus + Grafana + Alertmanager + Telegram
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
|
|
# ── 1. nginx stub_status for nginx-exporter ──────────────────────────
|
|
- name: Enable nginx stub_status endpoint
|
|
ansible.builtin.copy:
|
|
content: |
|
|
server {
|
|
listen 80;
|
|
server_name 127.0.0.1;
|
|
location /nginx_status {
|
|
stub_status;
|
|
allow 127.0.0.1;
|
|
allow 172.16.0.0/12;
|
|
deny all;
|
|
}
|
|
}
|
|
dest: /etc/nginx/conf.d/stub-status.conf
|
|
owner: root
|
|
group: root
|
|
mode: "0644"
|
|
notify: Reload nginx
|
|
|
|
# ── 2. Deploy nginx config for monitoring.performancewest.net ────────
|
|
- name: Deploy Grafana nginx config
|
|
ansible.builtin.template:
|
|
src: ../../nginx/templates/pw-monitoring-tls.conf.j2
|
|
dest: /etc/nginx/sites-available/pw-monitoring.conf
|
|
owner: root
|
|
group: root
|
|
mode: "0644"
|
|
notify: Reload nginx
|
|
|
|
- name: Enable Grafana nginx config
|
|
ansible.builtin.file:
|
|
src: /etc/nginx/sites-available/pw-monitoring.conf
|
|
dest: /etc/nginx/sites-enabled/pw-monitoring.conf
|
|
state: link
|
|
notify: Reload nginx
|
|
|
|
# ── 3. Obtain TLS certificate ────────────────────────────────────────
|
|
- name: Check if monitoring cert exists
|
|
ansible.builtin.stat:
|
|
path: /etc/letsencrypt/live/{{ monitoring_domain }}/fullchain.pem
|
|
register: monitoring_cert
|
|
|
|
- name: Obtain Let's Encrypt cert for monitoring domain
|
|
ansible.builtin.command:
|
|
cmd: >
|
|
certbot certonly --webroot -w {{ certbot_webroot }}
|
|
-d {{ monitoring_domain }}
|
|
--non-interactive --agree-tos
|
|
--email {{ certbot_email }}
|
|
when: not monitoring_cert.stat.exists
|
|
notify: Reload nginx
|
|
|
|
# ── 4. Set env vars for Telegram in .env ─────────────────────────────
|
|
- name: Ensure Telegram vars in .env
|
|
ansible.builtin.lineinfile:
|
|
path: "{{ project_dir }}/.env"
|
|
regexp: "^{{ item.key }}="
|
|
line: "{{ item.key }}={{ item.value }}"
|
|
state: present
|
|
loop:
|
|
- { key: "TELEGRAM_BOT_TOKEN", value: "{{ telegram_bot_token }}" }
|
|
- { key: "TELEGRAM_CHAT_ID", value: "{{ telegram_chat_id }}" }
|
|
- { key: "GRAFANA_ADMIN_USER", value: "{{ grafana_admin_user }}" }
|
|
- { key: "GRAFANA_ADMIN_PASSWORD", value: "{{ grafana_admin_password }}" }
|
|
when: telegram_bot_token != ""
|
|
no_log: true
|
|
|
|
# ── 5. UFW rules ─────────────────────────────────────────────────────
|
|
- name: Allow Grafana from localhost only
|
|
community.general.ufw:
|
|
rule: allow
|
|
port: "{{ grafana_port }}"
|
|
proto: tcp
|
|
from_ip: 127.0.0.1
|
|
comment: "Grafana (via nginx)"
|
|
|
|
# ── 6. Start monitoring stack ────────────────────────────────────────
|
|
- name: Start monitoring containers
|
|
ansible.builtin.shell:
|
|
cmd: >
|
|
cd {{ project_dir }} &&
|
|
docker compose up -d prometheus grafana alertmanager
|
|
node-exporter cadvisor postgres-exporter nginx-exporter blackbox-exporter
|
|
chdir: "{{ project_dir }}"
|
|
changed_when: true
|