From 7670608c1a2e92bb9049fea0a025998416e9a6b1 Mon Sep 17 00:00:00 2001 From: justin Date: Sun, 7 Jun 2026 04:49:53 -0500 Subject: [PATCH] fix(monitoring): render alertmanager.yml from template at deploy (fixes crash loop) Alertmanager does not expand ${ENV} in its YAML, so the committed config with ${TELEGRAM_BOT_TOKEN}/${TELEGRAM_CHAT_ID} crash-looped it (line 24: cannot unmarshal !!str `${TELEG...` into int64) - 11k+ restarts on prod, alerting dead. - rename alertmanager.yml -> alertmanager.yml.template (keeps ${} placeholders) - deploy.sh: envsubst the template into the (gitignored) alertmanager.yml from .env, scoped to the two TELEGRAM vars so the {{ }} Go-template message survives - gitignore the rendered file (contains the bot token) - warns if the vars are unset --- .gitignore | 3 +++ deploy.sh | 16 ++++++++++++++++ ...lertmanager.yml => alertmanager.yml.template} | 0 3 files changed, 19 insertions(+) rename monitoring/{alertmanager.yml => alertmanager.yml.template} (100%) diff --git a/.gitignore b/.gitignore index 1a63043..ca6af6e 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,6 @@ site/dist/ site/.astro/ mcp/dist/ data/hc_warmup*.csv + +# Rendered from monitoring/alertmanager.yml.template by deploy.sh (contains secrets) +monitoring/alertmanager.yml diff --git a/deploy.sh b/deploy.sh index be95042..1f02876 100755 --- a/deploy.sh +++ b/deploy.sh @@ -31,6 +31,22 @@ echo "" echo "=== Syncing canonical site header (Services dropdown) ===" python3 scripts/sync_nav.py +# Render the Alertmanager config from its template. Alertmanager does NOT expand +# ${ENV} placeholders in its YAML, so the raw template (with ${TELEGRAM_BOT_TOKEN} +# / ${TELEGRAM_CHAT_ID}) crash-loops it ("cannot unmarshal !!str `${TELEG...`"). +# We substitute the real values here from .env at deploy time. Only those two +# vars are expanded so Alertmanager's own {{ }} Go-template message is untouched. +echo "" +echo "=== Rendering monitoring/alertmanager.yml from template ===" +if [ -f monitoring/alertmanager.yml.template ]; then + set -a; [ -f .env ] && . ./.env; set +a + envsubst '${TELEGRAM_BOT_TOKEN} ${TELEGRAM_CHAT_ID}' \ + < monitoring/alertmanager.yml.template > monitoring/alertmanager.yml + if grep -q '\${TELEGRAM' monitoring/alertmanager.yml; then + echo "WARN: TELEGRAM_BOT_TOKEN/TELEGRAM_CHAT_ID not set in .env; Alertmanager will crash-loop." >&2 + fi +fi + echo "" echo "=== Building: $SERVICES ===" # ERPNext bakes the custom Frappe apps into its image, so they must be staged diff --git a/monitoring/alertmanager.yml b/monitoring/alertmanager.yml.template similarity index 100% rename from monitoring/alertmanager.yml rename to monitoring/alertmanager.yml.template