Compare commits
3 commits
2e4388a803
...
4dc5690666
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4dc5690666 | ||
|
|
c183957939 | ||
|
|
a32a3b05a0 |
18 changed files with 368 additions and 7 deletions
|
|
@ -300,6 +300,11 @@ services:
|
|||
|
||||
listmonk:
|
||||
image: listmonk/listmonk:latest
|
||||
# Stable hostname so the Message-ID Listmonk derives from the container OS
|
||||
# hostname is perfwest.performancewest.net, NOT the random docker container
|
||||
# id -> @localhost.localdomain (a spam-score signal; see deliverability
|
||||
# runbook). Matches Listmonk's SMTP hello_hostname.
|
||||
hostname: perfwest.performancewest.net
|
||||
ports:
|
||||
- "9100:9000"
|
||||
environment:
|
||||
|
|
@ -335,6 +340,9 @@ services:
|
|||
# mynetworks 172.16/12). host.docker.internal is mapped for convenience.
|
||||
listmonk-hc:
|
||||
image: listmonk/listmonk:latest
|
||||
# Stable hostname -> Message-ID @perfwest.performancewest.net, not the random
|
||||
# container id -> @localhost.localdomain (spam-score signal). See listmonk above.
|
||||
hostname: perfwest.performancewest.net
|
||||
ports:
|
||||
- "9101:9000"
|
||||
extra_hosts:
|
||||
|
|
|
|||
11
infra/ansible/playbooks/deploy-mail-pipeline.yml
Normal file
11
infra/ansible/playbooks/deploy-mail-pipeline.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
# Deploy only the mail-pipeline role (campaign crons, IP warmup/ramp helpers,
|
||||
# bounce watchers). Run after changing anything under infra/cron, infra/postfix,
|
||||
# infra/monitoring, infra/systemd, or scripts/*bounce*.
|
||||
#
|
||||
# Usage: ansible-playbook playbooks/deploy-mail-pipeline.yml -i inventory/hosts.yml --ask-vault-pass
|
||||
- name: Deploy mail-pipeline (campaign crons + warmup + bounce watchers)
|
||||
hosts: pw
|
||||
become: true
|
||||
roles:
|
||||
- role: "{{ playbook_dir }}/../roles/mail-pipeline"
|
||||
|
|
@ -16,6 +16,7 @@
|
|||
# workers — Python job server + Ollama LLM
|
||||
# shkeeper — k3s + Helm + SHKeeper (crypto payments: BTC/ETH/USDC/Polygon/TRX/BNB/LTC)
|
||||
# mail — OpenDKIM signing for outbound Postfix mail (incl. Listmonk campaigns)
|
||||
# mail-pipeline — campaign cron builders + IP warmup/ramp + bounce watchers
|
||||
# nginx — nginx + certbot TLS for all domains + fail2ban
|
||||
|
||||
- name: Provision Performance West server
|
||||
|
|
@ -33,6 +34,7 @@
|
|||
- worker-crons
|
||||
- shkeeper
|
||||
- mail
|
||||
- mail-pipeline
|
||||
- nginx
|
||||
- monitoring
|
||||
- security-updates
|
||||
|
|
|
|||
7
infra/ansible/roles/mail-pipeline/defaults/main.yml
Normal file
7
infra/ansible/roles/mail-pipeline/defaults/main.yml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
# mail-pipeline role defaults
|
||||
#
|
||||
# project_dir + deploy_user are normally provided by the common/app roles'
|
||||
# group_vars; these defaults keep the role self-contained.
|
||||
project_dir: /opt/performancewest
|
||||
deploy_user: deploy
|
||||
14
infra/ansible/roles/mail-pipeline/handlers/main.yml
Normal file
14
infra/ansible/roles/mail-pipeline/handlers/main.yml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart pw-bounce-watcher
|
||||
ansible.builtin.systemd:
|
||||
name: pw-bounce-watcher.service
|
||||
state: restarted
|
||||
|
||||
- name: Restart pw-hc-bounce-watcher
|
||||
ansible.builtin.systemd:
|
||||
name: pw-hc-bounce-watcher.service
|
||||
state: restarted
|
||||
119
infra/ansible/roles/mail-pipeline/tasks/main.yml
Normal file
119
infra/ansible/roles/mail-pipeline/tasks/main.yml
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
---
|
||||
# mail-pipeline role
|
||||
#
|
||||
# Codifies the outbound email-campaign pipeline that previously lived ONLY on
|
||||
# the host (none of this was in IaC before -- a fresh rebuild would have silently
|
||||
# shipped NO campaigns, NO IP warmup/ramp, and NO bounce processing):
|
||||
#
|
||||
# - /etc/cron.d/pw-* daily campaign builders + IP-warmup/ramp drivers
|
||||
# - /usr/local/bin/pw-* warmup/ramp/healthcheck helper scripts
|
||||
# - /usr/local/bin/postfix-*-bounce-notify.sh bounce watchers
|
||||
# - pw-bounce-watcher / pw-hc-bounce-watcher systemd watcher services
|
||||
#
|
||||
# The campaign BUILDER logic (scripts/build_*.py) is synced with the app/workers
|
||||
# code; this role only deploys the host-level glue (cron + helper scripts +
|
||||
# services). The OpenDKIM signing + mail.log logrotate live in the `mail` role.
|
||||
|
||||
# ── log + state dirs ────────────────────────────────────────────────────────
|
||||
# The deploy user CANNOT write /var/log, so the deploy-owned cron jobs log to
|
||||
# /opt/performancewest/logs. A missing dir makes the `>>` redirect fail before
|
||||
# the command runs (cron then mails the error to deploy@ -> self-bounce).
|
||||
- name: Ensure deploy-owned cron log directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ project_dir }}/logs"
|
||||
state: directory
|
||||
owner: "{{ deploy_user }}"
|
||||
group: "{{ deploy_user }}"
|
||||
mode: "0775"
|
||||
|
||||
# ── warmup / ramp helper scripts (run as root: edit main.cf, restart cntrs) ──
|
||||
- name: Deploy mail warmup/ramp/healthcheck helper scripts
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../{{ item.src }}"
|
||||
dest: "/usr/local/bin/{{ item.dest }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
loop:
|
||||
- { src: "infra/postfix/pw-mta-warmup.sh", dest: "pw-mta-warmup" }
|
||||
- { src: "infra/postfix/pw-listmonk-rampcap.sh", dest: "pw-listmonk-rampcap" }
|
||||
- { src: "infra/postfix/pw-hc-rampcap.sh", dest: "pw-hc-rampcap" }
|
||||
- { src: "infra/monitoring/pw-warmup-tg-alert.sh", dest: "pw-warmup-tg-alert" }
|
||||
|
||||
# ── bounce watchers (tail mail.log -> Listmonk bounce webhook) ──────────────
|
||||
- name: Deploy bounce-watcher scripts
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../{{ item.src }}"
|
||||
dest: "/usr/local/bin/{{ item.dest }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
loop:
|
||||
- { src: "scripts/bounce-watcher.sh", dest: "postfix-bounce-notify.sh" }
|
||||
- { src: "scripts/hc-bounce-watcher.sh", dest: "postfix-hc-bounce-notify.sh" }
|
||||
notify:
|
||||
- Restart pw-bounce-watcher
|
||||
- Restart pw-hc-bounce-watcher
|
||||
|
||||
- name: Deploy bounce-watcher systemd units
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../infra/systemd/{{ item }}"
|
||||
dest: "/etc/systemd/system/{{ item }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
loop:
|
||||
- pw-bounce-watcher.service
|
||||
- pw-hc-bounce-watcher.service
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart pw-bounce-watcher
|
||||
- Restart pw-hc-bounce-watcher
|
||||
|
||||
- name: Enable + start bounce-watcher services
|
||||
ansible.builtin.systemd:
|
||||
name: "{{ item }}"
|
||||
enabled: true
|
||||
state: started
|
||||
daemon_reload: true
|
||||
loop:
|
||||
- pw-bounce-watcher.service
|
||||
- pw-hc-bounce-watcher.service
|
||||
|
||||
# ── listmonk bounce-sync poller (host python, every 5 min via root crontab) ──
|
||||
- name: Deploy listmonk bounce-sync poller
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../scripts/listmonk-bounce-sync.py"
|
||||
dest: /usr/local/bin/listmonk-bounce-sync.py
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0755"
|
||||
|
||||
- name: Schedule listmonk bounce-sync (root crontab, every 5 min)
|
||||
ansible.builtin.cron:
|
||||
name: listmonk-bounce-sync
|
||||
minute: "*/5"
|
||||
job: "/usr/bin/python3 /usr/local/bin/listmonk-bounce-sync.py >> /var/log/bounce-sync.log 2>&1"
|
||||
|
||||
# ── campaign + warmup cron.d files ──────────────────────────────────────────
|
||||
# These reference scripts/ in {{ project_dir }} and the docker compose stack, so
|
||||
# they are deployed verbatim from infra/cron/ (the canonical, reviewed copies).
|
||||
- name: Deploy campaign + warmup cron.d files
|
||||
ansible.builtin.copy:
|
||||
src: "{{ playbook_dir }}/../../infra/cron/{{ item }}"
|
||||
dest: "/etc/cron.d/{{ item }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
loop:
|
||||
- pw-trucking-campaign-builder
|
||||
- pw-ifta-campaign
|
||||
- pw-ucr-campaign
|
||||
- pw-hc-campaign
|
||||
- pw-hc-nppes
|
||||
- pw-hc-refresh
|
||||
- pw-mta-warmup
|
||||
- pw-listmonk-rampcap
|
||||
- pw-hc-rampcap
|
||||
- pw-ip-rehab
|
||||
- pw-warmup-tg-alert
|
||||
5
infra/cron/pw-hc-rampcap
Normal file
5
infra/cron/pw-hc-rampcap
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Ramp the healthcare listmonk-hc hourly send cap in lockstep with the HC IP
|
||||
# warmup, driven off the SEPARATE /etc/postfix/hc-warmup-start stamp and writing
|
||||
# the SEPARATE listmonk_hc DB. Restarts listmonk-hc only when the cap changes.
|
||||
# Helper: infra/postfix/pw-hc-rampcap.sh -> /usr/local/bin/pw-hc-rampcap.
|
||||
20 7 * * * root /usr/local/bin/pw-hc-rampcap >> /var/log/pw-hc-rampcap.log 2>&1
|
||||
|
|
@ -8,6 +8,11 @@
|
|||
# CMS data-lag window to ~2-3 days, so a provider who just completed their
|
||||
# revalidation stops being targeted faster (fewer "already done" replies).
|
||||
# Takes ~8 min. SAM is opt-in (--sam-pages); SAM exclusions rarely carry an NPI,
|
||||
# so OIG LEIE is the NPI-bearing exclusion source. Then prune-only removes newly-
|
||||
# Google-hosted and suppressed subscribers from the warmup lists.
|
||||
0 6 * * 1,3,5 deploy cd /opt/performancewest && python3 -u scripts/hc_data_refresh.py >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1 && python3 -u scripts/build_healthcare_campaigns_cron.py --prune-only >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1
|
||||
# so OIG LEIE is the NPI-bearing exclusion source. Pipeline:
|
||||
# 1. hc_data_refresh.py -- re-verify NPIs vs CMS/OIG + MX reclassify
|
||||
# 2. download CMS revalidation_base.csv (institutional revalidation dates)
|
||||
# 3. enrich_institutional_revalidation.py -- merge reval dates into the
|
||||
# institutional CSV consumed by the pw-hc-nppes builder
|
||||
# 4. build_healthcare_campaigns_cron.py --prune-only -- evict newly-Google-
|
||||
# hosted + suppressed subscribers from the warmup lists
|
||||
0 6 * * 1,3,5 deploy cd /opt/performancewest && python3 -u scripts/hc_data_refresh.py >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1 && curl -s "https://data.cms.gov/sites/default/files/2026-05/96484587-20ec-4070-a4de-cd7de3ec0093/revalidation_base.csv" -o data/npi_build/revalidation_base.csv 2>>/opt/performancewest/logs/pw-hc-refresh.log && python3 -u scripts/enrich_institutional_revalidation.py data/hc_nppes_institutional_verified.csv data/npi_build/revalidation_base.csv data/hc_nppes_institutional_enriched.csv >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1 && python3 -u scripts/build_healthcare_campaigns_cron.py --prune-only >> /opt/performancewest/logs/pw-hc-refresh.log 2>&1
|
||||
|
|
|
|||
5
infra/cron/pw-ifta-campaign
Normal file
5
infra/cron/pw-ifta-campaign
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# IFTA quarterly-return reminder. Runs daily; the builder self-gates to the
|
||||
# ~21-day-before-deadline window (Apr30/Jul31/Oct31/Jan31), so it only actually
|
||||
# sends 4 times/year. Reuses the trucking sender plumbing + same-day coupon.
|
||||
# CAMPAIGN_IFTA_QUARTERLY_ID is the source/base campaign to clone.
|
||||
45 7 * * 1-5 deploy cd /opt/performancewest && docker compose exec -T -e CAMPAIGN_IFTA_QUARTERLY_ID=469 workers python3 -m scripts.build_ifta_quarterly_campaign --start-campaign >> /opt/performancewest/logs/pw-ifta-campaign.log 2>&1
|
||||
5
infra/cron/pw-listmonk-rampcap
Normal file
5
infra/cron/pw-listmonk-rampcap
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Ramp the trucking Listmonk hourly send cap (sliding window) in lockstep with
|
||||
# the Postfix IP warmup, driven off /etc/postfix/pw-warmup-start. Restarts the
|
||||
# listmonk container only when the cap changes. Helper:
|
||||
# infra/postfix/pw-listmonk-rampcap.sh -> /usr/local/bin/pw-listmonk-rampcap.
|
||||
20 7 * * * root /usr/local/bin/pw-listmonk-rampcap >> /var/log/pw-listmonk-rampcap.log 2>&1
|
||||
5
infra/cron/pw-mta-warmup
Normal file
5
infra/cron/pw-mta-warmup
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Postfix outbound-IP warmup scheduler. Recomputes the active sending-IP
|
||||
# rotation pool from the warmup start date (/etc/postfix/pw-warmup-start) and
|
||||
# reloads Postfix only when it changes. Helper: infra/postfix/pw-mta-warmup.sh
|
||||
# -> /usr/local/bin/pw-mta-warmup. Runs as root (edits main.cf + postfix reload).
|
||||
17 7 * * * root /usr/local/bin/pw-mta-warmup >> /var/log/pw-mta-warmup.log 2>&1
|
||||
4
infra/cron/pw-trucking-campaign-builder
Normal file
4
infra/cron/pw-trucking-campaign-builder
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# Build next day's trucking Listmonk campaigns daily at 08:00 UTC (3 AM EST).
|
||||
# 4 TZ regions x {MCS-150 overdue, Inactive USDOT}. Runs inside the workers
|
||||
# container; per-MX throttling + warmup ramp bound the actual volume.
|
||||
0 8 * * * deploy cd /opt/performancewest && docker compose exec -T workers python3 -m scripts.build_trucking_campaigns >> /var/log/pw-trucking-campaign-builder.log 2>&1
|
||||
4
infra/cron/pw-ucr-campaign
Normal file
4
infra/cron/pw-ucr-campaign
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
# UCR annual-registration reminder. Runs daily; the builder self-gates to the
|
||||
# 30/12/4-business-day-before-Dec-31 touch windows, so it only sends ~3x/year.
|
||||
# CAMPAIGN_UCR_ANNUAL_ID is the source/base campaign to clone.
|
||||
50 7 * * 1-5 deploy cd /opt/performancewest && docker compose exec -T -e CAMPAIGN_UCR_ANNUAL_ID=473 workers python3 -m scripts.build_ucr_annual_campaign --start-campaign >> /opt/performancewest/logs/pw-ucr-campaign.log 2>&1
|
||||
13
infra/systemd/pw-bounce-watcher.service
Normal file
13
infra/systemd/pw-bounce-watcher.service
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[Unit]
|
||||
Description=Postfix bounce watcher -> Listmonk webhook
|
||||
After=postfix.service
|
||||
Wants=postfix.service
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/local/bin/postfix-bounce-notify.sh
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
@ -70,11 +70,43 @@ DO_NOT_CONTACT_EMAILS: frozenset[str] = frozenset({
|
|||
"dave@dataspindle.com",
|
||||
})
|
||||
|
||||
# Defunct / legacy / satellite ISP mailbox domains. Cold-mailing these is pure
|
||||
# reputation drag: the mailboxes are overwhelmingly dead (the brand was shut down
|
||||
# or absorbed years ago and the addresses now hard-bounce) or the operator
|
||||
# (satellite / small rural ISP) aggressively defers cold B2B mail with poor
|
||||
# eventual delivery. Identified from our own Listmonk bounce table (top bounced
|
||||
# recipient domains) cross-checked against ISP status. NOTE: still-active large
|
||||
# consumer ISPs (comcast.net, charter.net, cox.net, centurylink.net) are
|
||||
# deliberately NOT here -- their bounces were the cold-IP/no-DKIM reputation
|
||||
# problem (now fixed), not dead mailboxes, and they carry real prospects.
|
||||
DEAD_ISP_DOMAINS: frozenset[str] = frozenset({
|
||||
# Defunct dial-up / early-ISP brands (mail shut down or vestigial)
|
||||
"earthlink.net", "peoplepc.com", "mindspring.com", "netzero.net",
|
||||
"netzero.com", "juno.com", "excite.com", "lycos.com", "wmconnect.com",
|
||||
"adelphia.net", "voyager.net", "core.com", "localnet.com", "pldi.net",
|
||||
"ptsi.net", "cablespeed.com",
|
||||
# CenturyLink / Qwest / Embarq legacy brands (migrated/abandoned)
|
||||
"qwest.net", "qwestoffice.net", "embarqmail.com", "centurytel.net",
|
||||
"citlink.net", "citynet.net",
|
||||
# Satellite (poor cold-mail deliverability, high defer/bounce)
|
||||
"hughes.net", "wildblue.net", "dishmail.net", "wildblueinternet.net",
|
||||
# Altice / Optimum / Suddenlink / Cablevision family (rural, aggressive defer)
|
||||
"optonline.net", "suddenlink.net", "cebridge.net", "bresnan.net",
|
||||
# WOW! / Knology, Mediacom, Insight, Atlantic Broadband/Breezeline, Cable One
|
||||
"wowway.com", "knology.net", "mchsi.com", "insightbb.com", "atlanticbb.net",
|
||||
"breezeline.net", "cableone.net", "cableone.com",
|
||||
# Small / rural regional ISPs (aggressive defer, low cold deliverability)
|
||||
"windstream.net", "tds.net", "iowatelecom.net", "netins.net", "mhtc.net",
|
||||
"arvig.net", "consolidated.net", "fuse.net", "ncn.net", "new.rr.com",
|
||||
# Alaska regional (satellite/long-haul, poor cold deliverability)
|
||||
"gci.net", "alaska.net", "acsalaska.net", "gulftel.com",
|
||||
})
|
||||
|
||||
# The full set of consumer domains we refuse to cold-mail. Extend here as we
|
||||
# discover other reputation-sensitive providers.
|
||||
BLOCKED_EMAIL_DOMAINS: frozenset[str] = (
|
||||
YAHOO_FAMILY_DOMAINS | GOOGLE_CONSUMER_DOMAINS | MICROSOFT_CONSUMER_DOMAINS
|
||||
| DO_NOT_CONTACT_DOMAINS
|
||||
| DEAD_ISP_DOMAINS | DO_NOT_CONTACT_DOMAINS
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
105
scripts/_email_plaintext.py
Normal file
105
scripts/_email_plaintext.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Shared HTML -> plaintext conversion for outbound campaigns.
|
||||
|
||||
Every campaign we build was HTML-only (no plaintext MIME part). A missing
|
||||
text/plain alternative is a spam-score signal: legitimate bulk senders ship
|
||||
multipart/alternative, and several filters (and most "this looks like spam"
|
||||
heuristics) penalise HTML-only mail. It also degrades the experience for
|
||||
plaintext-only clients and accessibility tooling.
|
||||
|
||||
Listmonk only emits multipart/alternative when a campaign's `altbody` is set;
|
||||
otherwise it sends text/html alone. So we generate a readable plaintext
|
||||
rendition from the HTML body and pass it as `altbody`.
|
||||
|
||||
This is intentionally dependency-free (no bs4/html2text on the prod box): a
|
||||
small, well-tested regex pipeline that:
|
||||
- drops <script>/<style>/<head> blocks,
|
||||
- turns <a href=...>text</a> into "text (url)" so links survive,
|
||||
- maps <br>, </p>, </div>, <li>, headings, <tr> to newlines,
|
||||
- prefixes <li> with "- ",
|
||||
- strips all remaining tags,
|
||||
- unescapes HTML entities,
|
||||
- collapses runs of blank lines / trailing whitespace.
|
||||
|
||||
Listmonk template tags ({{ .Subscriber... }}, {{ UnsubscribeURL }}) are left
|
||||
untouched so they still render per-subscriber in the plaintext part too.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html as _html
|
||||
import re
|
||||
|
||||
__all__ = ["html_to_text"]
|
||||
|
||||
_RE_FLAGS = re.IGNORECASE | re.DOTALL
|
||||
|
||||
# Whole blocks whose *content* must be discarded, not just the tags.
|
||||
_DROP_BLOCKS = re.compile(
|
||||
r"<(script|style|head|title|noscript)\b[^>]*>.*?</\1>", _RE_FLAGS
|
||||
)
|
||||
# HTML comments (Listmonk/MSO conditional comments etc.).
|
||||
_COMMENTS = re.compile(r"<!--.*?-->", _RE_FLAGS)
|
||||
# <a href="URL" ...>TEXT</a> -> TEXT (URL) (skip mailto:/tel:/anchors/templated)
|
||||
_ANCHORS = re.compile(
|
||||
r'<a\b[^>]*?\bhref\s*=\s*["\']([^"\']+)["\'][^>]*>(.*?)</a>', _RE_FLAGS
|
||||
)
|
||||
# Tags that should become a line break.
|
||||
_BR = re.compile(r"<br\s*/?>", re.IGNORECASE)
|
||||
_BLOCK_END = re.compile(
|
||||
r"</(p|div|h[1-6]|tr|table|ul|ol|blockquote|section|header|footer)>",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_LI = re.compile(r"<li\b[^>]*>", re.IGNORECASE)
|
||||
_HR = re.compile(r"<hr\s*/?>", re.IGNORECASE)
|
||||
_ANY_TAG = re.compile(r"<[^>]+>")
|
||||
_MANY_BLANKS = re.compile(r"\n[ \t]*\n[ \t]*(\n[ \t]*)+")
|
||||
_TRAIL_WS = re.compile(r"[ \t]+\n")
|
||||
_MANY_SPACES = re.compile(r"[ \t]{2,}")
|
||||
|
||||
|
||||
def _anchor_repl(m: "re.Match[str]") -> str:
|
||||
url = m.group(1).strip()
|
||||
text = _ANY_TAG.sub("", m.group(2)).strip()
|
||||
low = url.lower()
|
||||
# mailto:/tel: -> surface the address (with link text if it adds info).
|
||||
# Bare in-page anchors -> keep text only. Templated hrefs (e.g.
|
||||
# {{ UnsubscribeURL }}) ARE kept as "text (url)" so the per-subscriber link
|
||||
# still renders in the plaintext part.
|
||||
if low.startswith(("mailto:", "tel:")):
|
||||
addr = url.split(":", 1)[1].split("?", 1)[0]
|
||||
if text and text != addr:
|
||||
return f"{text} ({addr})"
|
||||
return addr
|
||||
if low.startswith("#"):
|
||||
return text
|
||||
if not text:
|
||||
return url
|
||||
if text == url:
|
||||
return url
|
||||
return f"{text} ({url})"
|
||||
|
||||
|
||||
def html_to_text(html: str) -> str:
|
||||
"""Convert an HTML email body to a readable text/plain rendition.
|
||||
|
||||
Returns "" for empty input. Listmonk template tags are preserved verbatim.
|
||||
"""
|
||||
if not html:
|
||||
return ""
|
||||
s = html
|
||||
s = _DROP_BLOCKS.sub("", s)
|
||||
s = _COMMENTS.sub("", s)
|
||||
s = _ANCHORS.sub(_anchor_repl, s)
|
||||
s = _HR.sub("\n----------\n", s)
|
||||
s = _BR.sub("\n", s)
|
||||
s = _LI.sub("\n- ", s)
|
||||
s = _BLOCK_END.sub("\n", s)
|
||||
s = _ANY_TAG.sub("", s)
|
||||
s = _html.unescape(s)
|
||||
# Normalise whitespace: trim trailing spaces, collapse runs of spaces and
|
||||
# blank lines, strip leading/trailing blank space overall.
|
||||
s = _TRAIL_WS.sub("\n", s)
|
||||
s = _MANY_SPACES.sub(" ", s)
|
||||
s = _MANY_BLANKS.sub("\n\n", s)
|
||||
s = "\n".join(line.rstrip() for line in s.splitlines())
|
||||
return s.strip() + "\n"
|
||||
|
|
@ -61,6 +61,7 @@ REPLY_TO = "info@performancewest.net"
|
|||
# is the single source of truth shared with build_healthcare_campaigns.py.
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from build_healthcare_campaigns import SEGMENTS, template_path # noqa: E402
|
||||
from _email_plaintext import html_to_text # noqa: E402
|
||||
|
||||
|
||||
def load_suppressed() -> set[str]:
|
||||
|
|
@ -284,7 +285,7 @@ def ensure_campaign(seg_key: str, list_id: int) -> int:
|
|||
payload = {
|
||||
"name": dated, "subject": seg["subject"], "lists": [list_id],
|
||||
"from_email": FROM_EMAIL, "type": "regular", "content_type": "richtext",
|
||||
"body": body, "messenger": "email",
|
||||
"body": body, "altbody": html_to_text(body), "messenger": "email",
|
||||
"tags": ["healthcare", "warmup", seg_key],
|
||||
"headers": [{"Reply-To": REPLY_TO},
|
||||
{"List-Unsubscribe": "<{{ UnsubscribeURL }}>"},
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ if ROOT not in sys.path:
|
|||
sys.path.insert(0, ROOT)
|
||||
|
||||
from scripts._email_exclusions import BLOCKED_EMAIL_DOMAINS
|
||||
from scripts._email_plaintext import html_to_text
|
||||
|
||||
LOG = logging.getLogger("build_trucking_campaigns")
|
||||
|
||||
|
|
@ -551,6 +552,21 @@ def import_subscribers(list_id: int, subscribers: list[dict]) -> int:
|
|||
return added
|
||||
|
||||
|
||||
def _altbody_for(base: dict, body: str | None = None) -> str:
|
||||
"""Plaintext (text/plain) part for a campaign.
|
||||
|
||||
Listmonk only emits multipart/alternative when altbody is set; HTML-only
|
||||
mail is a spam-score signal. The source/base campaigns have no altbody, so
|
||||
derive one from the HTML body. `body` overrides base["body"] for test sends
|
||||
where merge fields were already substituted.
|
||||
"""
|
||||
existing = (base.get("altbody") or "").strip()
|
||||
if existing:
|
||||
return existing
|
||||
html = body if body is not None else base.get("body", "")
|
||||
return html_to_text(html)
|
||||
|
||||
|
||||
def create_and_schedule_campaign(
|
||||
base: dict,
|
||||
list_id: int,
|
||||
|
|
@ -566,7 +582,7 @@ def create_and_schedule_campaign(
|
|||
"type": "regular",
|
||||
"content_type": base["content_type"],
|
||||
"body": base["body"],
|
||||
"altbody": base.get("altbody"),
|
||||
"altbody": _altbody_for(base),
|
||||
"template_id": base["template_id"],
|
||||
"tags": base.get("tags") or [],
|
||||
"messenger": base.get("messenger") or "email",
|
||||
|
|
@ -611,7 +627,7 @@ def send_test(base: dict, campaign_id: int, sample_row: tuple, label: str, tz: s
|
|||
"name": base.get("name", "Test"), "subject": subj,
|
||||
"lists": list_ids, "from_email": base["from_email"],
|
||||
"type": "regular", "content_type": base["content_type"],
|
||||
"body": body, "altbody": base.get("altbody"),
|
||||
"body": body, "altbody": _altbody_for(base, body),
|
||||
"template_id": base["template_id"],
|
||||
"tags": base.get("tags") or [], "messenger": base.get("messenger") or "email",
|
||||
"headers": base.get("headers") or REPLY_TO_HEADERS,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue