feat(deliverability): burner-domain list verification + plan doc

The smtp_valid pool is only ~3k unsent — too small to sustain campaigns. SMTP probing can't confirm catch-all/mx_unreachable deliverability; only a REAL send can. burner_list_verify.py reconciles a verification send from a DISPOSABLE burner domain (isolated from PW/carrierone reputation): - hard bounce -> fmcsa_carriers.email_verify_result='hard_bounced' (excluded) - delivered -> 'send_confirmed' (proven deliverable; PW campaigns send to it) It tails the burner MTA mail.log (reuses bounce-watcher's status= pattern) and writes back idempotently. The PW trucking filter now treats smtp_valid + send_confirmed as sendable. docs/campaign-deliverability-plan.md captures the full diagnosis, the burner design, and CAN-SPAM guardrails. Remaining (needs a domain + isolated MTA identity — operator/infra decision): stand up the burner domain, the verification-send worker, and a writeback cron.
2026-06-16 22:28:24 -05:00 · 2026-06-16 22:28:24 -05:00 · c2737f2001
commit c2737f2001
parent 1652a3b8bc
3 changed files with 258 additions and 5 deletions
--- a/scripts/build_trucking_campaigns.py
+++ b/scripts/build_trucking_campaigns.py
@ -339,11 +339,13 @@ REPLY_TO_HEADERS = [{"name": "Reply-To", "value": REPLY_TO_EMAIL}]
 # blocklisted). So 'mx_unreachable' and all error/reject results are excluded.
 #
 # Recovery mode (default ON while reputation is damaged): send ONLY 'smtp_valid'
-# — addresses an MX explicitly accepted at RCPT time — to drive the bounce rate
-# to near-zero and rebuild sender reputation. Once recovered, set
-# CAMPAIGN_INCLUDE_CATCH_ALL=1 to re-add catch-all domains (which accept at SMTP
-# time but can still bounce later, so they stay out during recovery).
-_SENDABLE_RESULTS = ["smtp_valid"]
+# — addresses an MX explicitly accepted at RCPT time — plus 'send_confirmed'
+# (addresses proven deliverable by a real burner-domain verification send; see
+# docs/campaign-deliverability-plan.md). This drives the bounce rate to near-zero
+# and rebuilds sender reputation. Once recovered, set CAMPAIGN_INCLUDE_CATCH_ALL=1
+# to re-add catch-all domains (which accept at SMTP time but can still bounce
+# later, so they stay out during recovery). 'hard_bounced' is NEVER sendable.
+_SENDABLE_RESULTS = ["smtp_valid", "send_confirmed"]
 if os.getenv("CAMPAIGN_INCLUDE_CATCH_ALL", "0") not in ("0", "false", ""):
    _SENDABLE_RESULTS += ["catch_all_domain", "catch_all_detected"]
 USABLE_FILTER = (
--- a/scripts/burner_list_verify.py
+++ b/scripts/burner_list_verify.py
@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+"""Burner-domain list verification: write deliverability back to fmcsa_carriers.
+
+The SMTP-probe verifier (email_verifier.py) can't tell which catch-all /
+mx_unreachable addresses actually deliver. The only ground truth is a REAL send.
+We do that from a disposable burner sending domain (NOT performancewest.net /
+carrierone.com — see docs/campaign-deliverability-plan.md) so the inevitable
+bounces never touch PW's reputation. This script reconciles that send:
+
+  1. Scan the burner MTA's mail.log for messages FROM the burner sender.
+  2. Any recipient that hard-bounced  -> fmcsa_carriers.email_verify_result =
+     'hard_bounced'        (permanently excluded from PW campaigns).
+  3. Any recipient that was DELIVERED (status=sent, no later bounce) and is not
+     already smtp_valid     -> 'send_confirmed'  (proven deliverable; the PW
+     campaign filter treats smtp_valid + send_confirmed as sendable).
+
+Idempotent: only upgrades 'catch_all_*' / 'mx_unreachable' / NULL rows to
+'send_confirmed', and only sets 'hard_bounced' on a real bounce. Never downgrades
+an already-confirmed address except to mark a genuine bounce.
+
+Usage:
+    python3 -m scripts.burner_list_verify --log /var/log/burner-mail.log
+    python3 -m scripts.burner_list_verify --log mail.log --dry-run
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import re
+import sys
+
+import psycopg2
+
+DATABASE_URL = os.getenv("DATABASE_URL", "")
+
+# Sender(s) used by the burner verification campaign. Override via env when the
+# burner domain is provisioned (e.g. BURNER_SENDERS="verify@listcheck-xyz.com").
+BURNER_SENDERS = {
+    s.strip().lower()
+    for s in os.getenv("BURNER_SENDERS", "").split(",")
+    if s.strip()
+}
+
+QID_RE = re.compile(r"postfix/\w+\[\d+\]: ([A-Z0-9]+):")
+FROM_RE = re.compile(r"from=<([^>]*)>")
+TO_RE = re.compile(r"to=<([^>]*)>")
+STATUS_RE = re.compile(r"status=(\w+)")
+
+# Results we are allowed to UPGRADE to 'send_confirmed'. We never overwrite an
+# explicit smtp_valid (already best) or a hard_bounced (worse signal wins).
+UPGRADABLE = ("catch_all_domain", "catch_all_detected", "mx_unreachable",
+              "smtp_temp_error", "smtp_unknown_451", "smtp_unknown_450")
+
+
+def scan_log(log_path: str) -> tuple[set[str], set[str]]:
+    """Return (delivered_emails, bounced_emails) for burner-sender messages."""
+    if not BURNER_SENDERS:
+        print("ERROR: set BURNER_SENDERS (e.g. verify@your-burner-domain.com)",
+              file=sys.stderr)
+        return set(), set()
+
+    burner_qids: set[str] = set()
+    qid_rcpt: dict[str, str] = {}
+    delivered: set[str] = set()
+    bounced: set[str] = set()
+
+    with open(log_path, errors="ignore") as f:
+        for line in f:
+            qm = QID_RE.search(line)
+            if not qm:
+                continue
+            qid = qm.group(1)
+
+            fm = FROM_RE.search(line)
+            if fm and fm.group(1).lower() in BURNER_SENDERS:
+                burner_qids.add(qid)
+
+            tm = TO_RE.search(line)
+            sm = STATUS_RE.search(line)
+            if tm and sm and qid in burner_qids:
+                rcpt = tm.group(1).lower()
+                qid_rcpt[qid] = rcpt
+                status = sm.group(1).lower()
+                if status == "bounced":
+                    bounced.add(rcpt)
+                elif status == "sent":
+                    delivered.add(rcpt)
+
+    # A bounce anywhere wins over a "sent" (deferred-then-bounced).
+    delivered -= bounced
+    return delivered, bounced
+
+
+def writeback(delivered: set[str], bounced: set[str], dry_run: bool = False) -> dict:
+    """Apply send_confirmed / hard_bounced to fmcsa_carriers."""
+    stats = {"confirmed": 0, "bounced": 0}
+    if not (delivered or bounced):
+        return stats
+    conn = psycopg2.connect(DATABASE_URL)
+    try:
+        with conn.cursor() as cur:
+            # Hard bounces: always mark (worst signal wins), excludes from PW sends.
+            for email in bounced:
+                if dry_run:
+                    stats["bounced"] += 1
+                    continue
+                cur.execute(
+                    """UPDATE fmcsa_carriers
+                          SET email_verify_result = 'hard_bounced',
+                              email_verified = FALSE
+                        WHERE lower(email_address) = %s
+                          AND email_verify_result IS DISTINCT FROM 'hard_bounced'""",
+                    (email,),
+                )
+                stats["bounced"] += cur.rowcount
+            # Delivered: upgrade soft/unknown results to send_confirmed.
+            for email in delivered:
+                if dry_run:
+                    stats["confirmed"] += 1
+                    continue
+                cur.execute(
+                    """UPDATE fmcsa_carriers
+                          SET email_verify_result = 'send_confirmed',
+                              email_verified = TRUE
+                        WHERE lower(email_address) = %s
+                          AND (email_verify_result IN %s OR email_verify_result IS NULL)""",
+                    (email, UPGRADABLE),
+                )
+                stats["confirmed"] += cur.rowcount
+        if not dry_run:
+            conn.commit()
+    finally:
+        conn.close()
+    return stats
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--log", default="/var/log/burner-mail.log",
+                    help="burner MTA mail.log to scan")
+    ap.add_argument("--dry-run", action="store_true")
+    args = ap.parse_args()
+
+    if not os.path.exists(args.log):
+        print(f"log not found: {args.log}", file=sys.stderr)
+        return 1
+    delivered, bounced = scan_log(args.log)
+    print(f"burner scan: {len(delivered)} delivered, {len(bounced)} bounced")
+    stats = writeback(delivered, bounced, dry_run=args.dry_run)
+    tag = "[dry-run] " if args.dry_run else ""
+    print(f"{tag}writeback: send_confirmed +{stats['confirmed']}, "
+          f"hard_bounced +{stats['bounced']}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())