Alertmanager does not expand ${ENV} in its YAML, so the committed config with
${TELEGRAM_BOT_TOKEN}/${TELEGRAM_CHAT_ID} crash-looped it (line 24: cannot
unmarshal !!str `${TELEG...` into int64) - 11k+ restarts on prod, alerting dead.
- rename alertmanager.yml -> alertmanager.yml.template (keeps ${} placeholders)
- deploy.sh: envsubst the template into the (gitignored) alertmanager.yml from
.env, scoped to the two TELEGRAM vars so the {{ }} Go-template message survives
- gitignore the rendered file (contains the bot token)
- warns if the vars are unset
40 lines
1 KiB
Text
40 lines
1 KiB
Text
global:
|
|
resolve_timeout: 5m
|
|
|
|
route:
|
|
receiver: telegram
|
|
group_by: [alertname, instance]
|
|
group_wait: 30s
|
|
group_interval: 5m
|
|
repeat_interval: 4h
|
|
routes:
|
|
- match:
|
|
severity: critical
|
|
receiver: telegram
|
|
repeat_interval: 1h
|
|
- match:
|
|
severity: warning
|
|
receiver: telegram
|
|
repeat_interval: 6h
|
|
|
|
receivers:
|
|
- name: telegram
|
|
telegram_configs:
|
|
- bot_token: "${TELEGRAM_BOT_TOKEN}"
|
|
chat_id: ${TELEGRAM_CHAT_ID}
|
|
parse_mode: HTML
|
|
message: |
|
|
{{ if eq .Status "firing" }}🔴{{ else }}✅{{ end }} <b>{{ .Status | toUpper }}</b>
|
|
{{ range .Alerts }}
|
|
<b>{{ .Labels.alertname }}</b>
|
|
{{ .Annotations.summary }}
|
|
{{ if .Annotations.description }}<i>{{ .Annotations.description }}</i>{{ end }}
|
|
{{ end }}
|
|
<code>Server: pw-server | {{ .ExternalURL }}</code>
|
|
|
|
inhibit_rules:
|
|
- source_match:
|
|
severity: critical
|
|
target_match:
|
|
severity: warning
|
|
equal: [alertname, instance]
|