new-site/deploy.sh
justin 42b433db5a deploy: reset generated site files before pull (fixes silently-stranded commits)
deploy.sh ran sync_nav.py / gen-service-catalog.py which dirty site/public +
site/src in place; that made 'git pull' abort, so recent commits never reached
prod until pulled manually. Reset those generated paths before pulling so deploys
always fast-forward. Also document the IRP POA signer-name/title follow-up.
2026-06-16 05:28:45 -05:00

129 lines
6.5 KiB
Bash
Executable file

#!/usr/bin/env bash
# Deploy latest code from git and rebuild containers.
# Usage: ./deploy.sh (rebuilds site, api, workers)
# ./deploy.sh site (rebuilds only site)
# ./deploy.sh api (rebuilds only api)
# ./deploy.sh erpnext (rebuild + migrate ERPNext, re-extract assets)
# ./deploy.sh api workers (rebuild a custom set)
set -euo pipefail
cd /opt/performancewest
SERVICES="${@:-site api workers proxy-relay listmonk-hc}"
# proxy-relay and listmonk-hc are upstream images (no build context). Build
# everything else, but always include them in the `up` set so the healthcare
# proxy sidecar and the healthcare-stream Listmonk run.
# NB: listmonk-hc needs a one-time DB setup the first time it is deployed:
# docker compose exec api-postgres psql -U pw -d postgres -c 'CREATE DATABASE listmonk_hc OWNER pw;'
# docker compose run --rm --entrypoint /bin/sh listmonk-hc -c './listmonk --install --idempotent --yes --config /listmonk/config.toml'
# then configure its 3 SMTP servers (hc ports 2526/2527/2528). See
# docs/healthcare-email-stream-plan.md.
BUILD_SERVICES="$(echo "$SERVICES" | tr ' ' '\n' | grep -vE '^(proxy-relay|listmonk-hc)$' | tr '\n' ' ')"
echo "=== Pulling latest from git ==="
# deploy steps below (sync_nav.py, gen-service-catalog.py) rewrite generated
# files under site/public + site/src in place, leaving the tree dirty. That dirty
# tree makes `git pull` abort ("local changes would be overwritten"), silently
# stranding new commits on an old checkout. Discard those generated changes first
# so the pull always fast-forwards. (Only generated paths are reset.)
git checkout -- site/public site/src 2>/dev/null || true
git pull origin main
# Single source of truth for the site header: rewrite every static page's
# <nav> block from site/src/partials/nav.html so the Services dropdown stays
# identical across the static site, the Astro order pages, and dev. Idempotent;
# does nothing if already in sync. (See scripts/sync_nav.py.)
echo ""
echo "=== Syncing canonical site header (Services dropdown) ==="
python3 scripts/sync_nav.py
# Single source of truth for service pricing: the API catalog
# (api/src/service-catalog.ts) is the authority (it is what checkout charges).
# The site build context is ./site only and cannot read ../api, so we generate
# site/src/lib/service-catalog.generated.ts here on the host before the docker
# build. This guarantees displayed prices == charged prices. (Python because the
# prod box has python3 but not node; matches scripts/sync_nav.py.)
echo ""
echo "=== Generating site service catalog from API source ==="
python3 scripts/gen-service-catalog.py
python3 scripts/check-service-catalog-drift.py
# Render the Alertmanager config from its template. Alertmanager does NOT expand
# ${ENV} placeholders in its YAML, so the raw template (with ${TELEGRAM_BOT_TOKEN}
# / ${TELEGRAM_CHAT_ID}) crash-loops it ("cannot unmarshal !!str `${TELEG...`").
# We substitute the real values here from .env at deploy time. Only those two
# vars are expanded so Alertmanager's own {{ }} Go-template message is untouched.
# NB: we extract just these two keys (not `source .env`) because .env holds values
# with shell-hostile chars (e.g. SMTP_PASS) that break `. ./.env`.
echo ""
echo "=== Rendering monitoring/alertmanager.yml from template ==="
if [ -f monitoring/alertmanager.yml.template ]; then
get_env() { sed -n "s/^$1=//p" .env | head -n1; }
TELEGRAM_BOT_TOKEN="$(get_env TELEGRAM_BOT_TOKEN)"
TELEGRAM_CHAT_ID="$(get_env TELEGRAM_CHAT_ID)"
export TELEGRAM_BOT_TOKEN TELEGRAM_CHAT_ID
envsubst '${TELEGRAM_BOT_TOKEN} ${TELEGRAM_CHAT_ID}' \
< monitoring/alertmanager.yml.template > monitoring/alertmanager.yml
if grep -q '\${TELEGRAM' monitoring/alertmanager.yml \
|| [ -z "$TELEGRAM_BOT_TOKEN" ] || [ -z "$TELEGRAM_CHAT_ID" ]; then
echo "WARN: TELEGRAM_BOT_TOKEN/TELEGRAM_CHAT_ID missing in .env; Alertmanager will crash-loop." >&2
fi
fi
echo ""
echo "=== Building: $SERVICES ==="
# ERPNext bakes the custom Frappe apps into its image, so they must be staged
# into the build context (erpnext/<app>/) from the repo first. Without this,
# `docker compose build erpnext` would use stale app copies and silently ship
# old code (e.g. the set-password controller rename would never take effect).
case " $SERVICES " in
*" erpnext "*) echo "--- staging custom Frappe apps ---"; bash erpnext/build.sh ;;
esac
[ -n "${BUILD_SERVICES// }" ] && docker compose build $BUILD_SERVICES
echo ""
echo "=== Restarting: $SERVICES ==="
docker compose up -d $SERVICES
# ── ERPNext: migrate, then ALWAYS re-extract the host asset copy ─────────────
# Frappe emits content-hashed asset filenames; an ERPNext rebuild/migrate
# changes the hashes. If we don't re-sync the host copy that nginx serves for
# portal.performancewest.net, every asset 404s and the portal loses all CSS.
# So any time erpnext is (re)built we run bench migrate + re-extract assets.
case " $SERVICES " in
*" erpnext "*)
echo ""
echo "=== ERPNext: bench migrate ==="
docker compose exec -T erpnext bench --site performancewest.net migrate || \
docker compose exec -T erpnext bench migrate || true
echo ""
echo "=== ERPNext: re-extracting static assets for the portal ==="
sudo ./extract-erpnext-assets.sh
;;
esac
echo ""
echo "=== Clearing nginx cache ==="
sudo rm -rf /var/cache/nginx/* 2>/dev/null || true
sudo nginx -s reload 2>/dev/null || true
# ── Portal asset drift guard ────────────────────────────────────────────────
# Cheap safety net on EVERY deploy: if the portal's manifest references a CSS
# bundle that is missing from the host copy, the portal CSS is broken — detect
# it and auto-heal by re-extracting. This catches drift from any source
# (out-of-band ERPNext restarts, image pulls, etc.).
if docker inspect performancewest-erpnext-1 >/dev/null 2>&1; then
LOGIN_HASH="$(docker exec performancewest-erpnext-1 sh -c \
"grep -o 'login.bundle.[A-Z0-9]*.css' /home/frappe/frappe-bench/sites/assets/assets.json | head -1" 2>/dev/null || true)"
if [ -n "$LOGIN_HASH" ] && \
[ ! -f "/opt/erpnext-assets/assets/frappe/dist/css/${LOGIN_HASH}" ]; then
echo ""
echo "=== Portal asset drift detected (${LOGIN_HASH} missing) — re-extracting ==="
sudo ./extract-erpnext-assets.sh
fi
fi
echo ""
echo "=== Done ==="
git log --oneline -1
docker compose ps --format "table {{.Name}}\t{{.Status}}" | head -10