infra: codify the email-campaign pipeline in Ansible (new mail-pipeline role)
The entire outbound campaign pipeline lived ONLY on the host and was never in
IaC -- a fresh rebuild would have silently shipped NO campaigns, NO IP warmup/
ramp, and NO bounce processing. New mail-pipeline role + deploy-mail-pipeline.yml
playbook deploy it from the canonical repo copies:
cron.d (infra/cron/):
- pw-trucking-campaign-builder, pw-ifta-campaign, pw-ucr-campaign
- pw-hc-campaign, pw-hc-nppes, pw-hc-refresh
- pw-mta-warmup, pw-listmonk-rampcap, pw-hc-rampcap
- pw-ip-rehab, pw-warmup-tg-alert
helper scripts (-> /usr/local/bin):
- pw-mta-warmup, pw-listmonk-rampcap, pw-hc-rampcap, pw-warmup-tg-alert
- postfix-bounce-notify.sh, postfix-hc-bounce-notify.sh, listmonk-bounce-sync.py
systemd services:
- pw-bounce-watcher.service (was missing from repo), pw-hc-bounce-watcher.service
Also creates the deploy-owned {{project_dir}}/logs dir (deploy can't write
/var/log, so a missing dir made cron redirects fail). Added the 6 cron.d files
that existed only on the host, the trucking bounce-watcher unit, and synced
infra/cron/pw-hc-refresh to the live version (revalidation download + enrich
steps). Role wired into site.yml after the mail (OpenDKIM) role.
Part of the email-deliverability incident hardening.
This commit is contained in:
parent
c183957939
commit
4dc5690666
13 changed files with 202 additions and 3 deletions
11
infra/ansible/playbooks/deploy-mail-pipeline.yml
Normal file
11
infra/ansible/playbooks/deploy-mail-pipeline.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
# Deploy only the mail-pipeline role (campaign crons, IP warmup/ramp helpers,
|
||||
# bounce watchers). Run after changing anything under infra/cron, infra/postfix,
|
||||
# infra/monitoring, infra/systemd, or scripts/*bounce*.
|
||||
#
|
||||
# Usage: ansible-playbook playbooks/deploy-mail-pipeline.yml -i inventory/hosts.yml --ask-vault-pass
|
||||
- name: Deploy mail-pipeline (campaign crons + warmup + bounce watchers)
|
||||
hosts: pw
|
||||
become: true
|
||||
roles:
|
||||
- role: "{{ playbook_dir }}/../roles/mail-pipeline"
|
||||
|
|
@ -16,6 +16,7 @@
|
|||
# workers — Python job server + Ollama LLM
|
||||
# shkeeper — k3s + Helm + SHKeeper (crypto payments: BTC/ETH/USDC/Polygon/TRX/BNB/LTC)
|
||||
# mail — OpenDKIM signing for outbound Postfix mail (incl. Listmonk campaigns)
|
||||
# mail-pipeline — campaign cron builders + IP warmup/ramp + bounce watchers
|
||||
# nginx — nginx + certbot TLS for all domains + fail2ban
|
||||
|
||||
- name: Provision Performance West server
|
||||
|
|
@ -33,6 +34,7 @@
|
|||
- worker-crons
|
||||
- shkeeper
|
||||
- mail
|
||||
- mail-pipeline
|
||||
- nginx
|
||||
- monitoring
|
||||
- security-updates
|
||||
|
|
|
|||
7
infra/ansible/roles/mail-pipeline/defaults/main.yml
Normal file
7
infra/ansible/roles/mail-pipeline/defaults/main.yml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
# mail-pipeline role defaults
|
||||
#
|
||||
# project_dir + deploy_user are normally provided by the common/app roles'
|
||||
# group_vars; these defaults keep the role self-contained.
|
||||
project_dir: /opt/performancewest
|
||||
deploy_user: deploy
|
||||
14
infra/ansible/roles/mail-pipeline/handlers/main.yml
Normal file
14
infra/ansible/roles/mail-pipeline/handlers/main.yml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart pw-bounce-watcher
|
||||
ansible.builtin.systemd:
|
||||
name: pw-bounce-watcher.service
|
||||
state: restarted
|
||||
|
||||
- name: Restart pw-hc-bounce-watcher
|
||||
ansible.builtin.systemd:
|
||||
name: pw-hc-bounce-watcher.service
|
||||
state: restarted
|
||||
119
infra/ansible/roles/mail-pipeline/tasks/main.yml
Normal file
119
infra/ansible/roles/mail-pipeline/tasks/main.yml
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
---
|
||||
# mail-pipeline role
|
||||
#
|
||||
# Codifies the outbound email-campaign pipeline that previously lived ONLY on
|
||||
# the host (none of this was in IaC before -- a fresh rebuild would have silently
|
||||
# shipped NO campaigns, NO IP warmup/ramp, and NO bounce processing):
|
||||
#
|
||||
# - /etc/cron.d/pw-* daily campaign builders + IP-warmup/ramp drivers
|
||||
# - /usr/local/bin/pw-* warmup/ramp/healthcheck helper scripts
|
||||
# - /usr/local/bin/postfix-*-bounce-notify.sh bounce watchers
|
||||
# - pw-bounce-watcher / pw-hc-bounce-watcher systemd watcher services
|
||||
#
|
||||
# The campaign BUILDER logic (scripts/build_*.py) is synced with the app/workers
|
||||
# code; this role only deploys the host-level glue (cron + helper scripts +
|
||||
# services). The OpenDKIM signing + mail.log logrotate live in the `mail` role.
|
||||
|
||||
# ── log + state dirs ────────────────────────────────────────────────────────
|
||||
# The deploy user CANNOT write /var/log, so the deploy-owned cron jobs log to
|
||||
# /opt/performancewest/logs. A missing dir makes the `>>` redirect fail before
|
||||
# the command runs (cron then mails the error to deploy@ -> self-bounce).
|
||||
- name: Ensure deploy-owned cron log directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ project_dir }}/logs"
|
||||
state: directory
|
||||
owner: "{{ deploy_user }}"
|
||||
group: "{{ deploy_user }}"
|
||||
mode: "0775"
|
||||
|
||||
# ── warmup / ramp helper scripts (run as root: edit main.cf, restart cntrs) ──
|
||||
- name: Deploy mail warmup/ramp/healthcheck helper scripts
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../{{ item.src }}"
|
||||
dest: "/usr/local/bin/{{ item.dest }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
loop:
|
||||
- { src: "infra/postfix/pw-mta-warmup.sh", dest: "pw-mta-warmup" }
|
||||
- { src: "infra/postfix/pw-listmonk-rampcap.sh", dest: "pw-listmonk-rampcap" }
|
||||
- { src: "infra/postfix/pw-hc-rampcap.sh", dest: "pw-hc-rampcap" }
|
||||
- { src: "infra/monitoring/pw-warmup-tg-alert.sh", dest: "pw-warmup-tg-alert" }
|
||||
|
||||
# ── bounce watchers (tail mail.log -> Listmonk bounce webhook) ──────────────
|
||||
- name: Deploy bounce-watcher scripts
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../{{ item.src }}"
|
||||
dest: "/usr/local/bin/{{ item.dest }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
loop:
|
||||
- { src: "scripts/bounce-watcher.sh", dest: "postfix-bounce-notify.sh" }
|
||||
- { src: "scripts/hc-bounce-watcher.sh", dest: "postfix-hc-bounce-notify.sh" }
|
||||
notify:
|
||||
- Restart pw-bounce-watcher
|
||||
- Restart pw-hc-bounce-watcher
|
||||
|
||||
- name: Deploy bounce-watcher systemd units
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../infra/systemd/{{ item }}"
|
||||
dest: "/etc/systemd/system/{{ item }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
loop:
|
||||
- pw-bounce-watcher.service
|
||||
- pw-hc-bounce-watcher.service
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart pw-bounce-watcher
|
||||
- Restart pw-hc-bounce-watcher
|
||||
|
||||
- name: Enable + start bounce-watcher services
|
||||
ansible.builtin.systemd:
|
||||
name: "{{ item }}"
|
||||
enabled: true
|
||||
state: started
|
||||
daemon_reload: true
|
||||
loop:
|
||||
- pw-bounce-watcher.service
|
||||
- pw-hc-bounce-watcher.service
|
||||
|
||||
# ── listmonk bounce-sync poller (host python, every 5 min via root crontab) ──
|
||||
- name: Deploy listmonk bounce-sync poller
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../scripts/listmonk-bounce-sync.py"
|
||||
dest: /usr/local/bin/listmonk-bounce-sync.py
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
|
||||
- name: Schedule listmonk bounce-sync (root crontab, every 5 min)
|
||||
ansible.builtin.cron:
|
||||
name: listmonk-bounce-sync
|
||||
minute: "*/5"
|
||||
job: "/usr/bin/python3 /usr/local/bin/listmonk-bounce-sync.py >> /var/log/bounce-sync.log 2>&1"
|
||||
|
||||
# ── campaign + warmup cron.d files ──────────────────────────────────────────
|
||||
# These reference scripts/ in {{ project_dir }} and the docker compose stack, so
|
||||
# they are deployed verbatim from infra/cron/ (the canonical, reviewed copies).
|
||||
- name: Deploy campaign + warmup cron.d files
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../infra/cron/{{ item }}"
|
||||
dest: "/etc/cron.d/{{ item }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
loop:
|
||||
- pw-trucking-campaign-builder
|
||||
- pw-ifta-campaign
|
||||
- pw-ucr-campaign
|
||||
- pw-hc-campaign
|
||||
- pw-hc-nppes
|
||||
- pw-hc-refresh
|
||||
- pw-mta-warmup
|
||||
- pw-listmonk-rampcap
|
||||
- pw-hc-rampcap
|
||||
- pw-ip-rehab
|
||||
- pw-warmup-tg-alert
|
||||
5
infra/cron/pw-hc-rampcap
Normal file
5
infra/cron/pw-hc-rampcap
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Ramp the healthcare listmonk-hc hourly send cap in lockstep with the HC IP
|
||||
# warmup, driven off the SEPARATE /etc/postfix/hc-warmup-start stamp and writing
|
||||
# the SEPARATE listmonk_hc DB. Restarts listmonk-hc only when the cap changes.
|
||||
# Helper: infra/postfix/pw-hc-rampcap.sh -> /usr/local/bin/pw-hc-rampcap.
|
||||
20 7 * * * root /usr/local/bin/pw-hc-rampcap >> /var/log/pw-hc-rampcap.log 2>&1
|
||||
|
|
@ -8,6 +8,11 @@
|
|||
# CMS data-lag window to ~2-3 days, so a provider who just completed their
|
||||
# revalidation stops being targeted faster (fewer "already done" replies).
|
||||
# Takes ~8 min. SAM is opt-in (--sam-pages); SAM exclusions rarely carry an NPI,
|
||||
# so OIG LEIE is the NPI-bearing exclusion source. Then prune-only removes newly-
|
||||
# Google-hosted and suppressed subscribers from the warmup lists.
|
||||
0 6 * * 1,3,5 deploy cd /opt/performancewest && python3 -u scripts/hc_data_refresh.py >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1 && python3 -u scripts/build_healthcare_campaigns_cron.py --prune-only >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1
|
||||
# so OIG LEIE is the NPI-bearing exclusion source. Pipeline:
|
||||
# 1. hc_data_refresh.py -- re-verify NPIs vs CMS/OIG + MX reclassify
|
||||
# 2. download CMS revalidation_base.csv (institutional revalidation dates)
|
||||
# 3. enrich_institutional_revalidation.py -- merge reval dates into the
|
||||
# institutional CSV consumed by the pw-hc-nppes builder
|
||||
# 4. build_healthcare_campaigns_cron.py --prune-only -- evict newly-Google-
|
||||
# hosted + suppressed subscribers from the warmup lists
|
||||
0 6 * * 1,3,5 deploy cd /opt/performancewest && python3 -u scripts/hc_data_refresh.py >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1 && curl -s "https://data.cms.gov/sites/default/files/2026-05/96484587-20ec-4070-a4de-cd7de3ec0093/revalidation_base.csv" -o data/npi_build/revalidation_base.csv 2>>/opt/performancewest/logs/pw-hc-refresh.log && python3 -u scripts/enrich_institutional_revalidation.py data/hc_nppes_institutional_verified.csv data/npi_build/revalidation_base.csv data/hc_nppes_institutional_enriched.csv >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1 && python3 -u scripts/build_healthcare_campaigns_cron.py --prune-only >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1
|
||||
|
|
|
|||
5
infra/cron/pw-ifta-campaign
Normal file
5
infra/cron/pw-ifta-campaign
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# IFTA quarterly-return reminder. Runs daily; the builder self-gates to the
|
||||
# ~21-day-before-deadline window (Apr30/Jul31/Oct31/Jan31), so it only actually
|
||||
# sends 4 times/year. Reuses the trucking sender plumbing + same-day coupon.
|
||||
# CAMPAIGN_IFTA_QUARTERLY_ID is the source/base campaign to clone.
|
||||
45 7 * * 1-5 deploy cd /opt/performancewest && docker compose exec -T -e CAMPAIGN_IFTA_QUARTERLY_ID=469 workers python3 -m scripts.build_ifta_quarterly_campaign --start-campaign >> /opt/performancewest/logs/pw-ifta-campaign.log 2>&1
|
||||
5
infra/cron/pw-listmonk-rampcap
Normal file
5
infra/cron/pw-listmonk-rampcap
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Ramp the trucking Listmonk hourly send cap (sliding window) in lockstep with
|
||||
# the Postfix IP warmup, driven off /etc/postfix/pw-warmup-start. Restarts the
|
||||
# listmonk container only when the cap changes. Helper:
|
||||
# infra/postfix/pw-listmonk-rampcap.sh -> /usr/local/bin/pw-listmonk-rampcap.
|
||||
20 7 * * * root /usr/local/bin/pw-listmonk-rampcap >> /var/log/pw-listmonk-rampcap.log 2>&1
|
||||
5
infra/cron/pw-mta-warmup
Normal file
5
infra/cron/pw-mta-warmup
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Postfix outbound-IP warmup scheduler. Recomputes the active sending-IP
|
||||
# rotation pool from the warmup start date (/etc/postfix/pw-warmup-start) and
|
||||
# reloads Postfix only when it changes. Helper: infra/postfix/pw-mta-warmup.sh
|
||||
# -> /usr/local/bin/pw-mta-warmup. Runs as root (edits main.cf + postfix reload).
|
||||
17 7 * * * root /usr/local/bin/pw-mta-warmup >> /var/log/pw-mta-warmup.log 2>&1
|
||||
4
infra/cron/pw-trucking-campaign-builder
Normal file
4
infra/cron/pw-trucking-campaign-builder
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# Build next day's trucking Listmonk campaigns daily at 08:00 UTC (3 AM EST).
|
||||
# 4 TZ regions x {MCS-150 overdue, Inactive USDOT}. Runs inside the workers
|
||||
# container; per-MX throttling + warmup ramp bound the actual volume.
|
||||
0 8 * * * deploy cd /opt/performancewest && docker compose exec -T workers python3 -m scripts.build_trucking_campaigns >> /var/log/pw-trucking-campaign-builder.log 2>&1
|
||||
4
infra/cron/pw-ucr-campaign
Normal file
4
infra/cron/pw-ucr-campaign
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# UCR annual-registration reminder. Runs daily; the builder self-gates to the
|
||||
# 30/12/4-business-day-before-Dec-31 touch windows, so it only sends ~3x/year.
|
||||
# CAMPAIGN_UCR_ANNUAL_ID is the source/base campaign to clone.
|
||||
50 7 * * 1-5 deploy cd /opt/performancewest && docker compose exec -T -e CAMPAIGN_UCR_ANNUAL_ID=473 workers python3 -m scripts.build_ucr_annual_campaign --start-campaign >> /opt/performancewest/logs/pw-ucr-campaign.log 2>&1
|
||||
13
infra/systemd/pw-bounce-watcher.service
Normal file
13
infra/systemd/pw-bounce-watcher.service
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[Unit]
|
||||
Description=Postfix bounce watcher -> Listmonk webhook
|
||||
After=postfix.service
|
||||
Wants=postfix.service
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/local/bin/postfix-bounce-notify.sh
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Loading…
Add table
Add a link
Reference in a new issue