The smtp_valid pool is only ~3k unsent — too small to sustain campaigns. SMTP probing can't confirm catch-all/mx_unreachable deliverability; only a REAL send can. burner_list_verify.py reconciles a verification send from a DISPOSABLE burner domain (isolated from PW/carrierone reputation): - hard bounce -> fmcsa_carriers.email_verify_result='hard_bounced' (excluded) - delivered -> 'send_confirmed' (proven deliverable; PW campaigns send to it) It tails the burner MTA mail.log (reuses bounce-watcher's status= pattern) and writes back idempotently. The PW trucking filter now treats smtp_valid + send_confirmed as sendable. docs/campaign-deliverability-plan.md captures the full diagnosis, the burner design, and CAN-SPAM guardrails. Remaining (needs a domain + isolated MTA identity — operator/infra decision): stand up the burner domain, the verification-send worker, and a writeback cron.
157 lines
5.9 KiB
Python
157 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Burner-domain list verification: write deliverability back to fmcsa_carriers.
|
|
|
|
The SMTP-probe verifier (email_verifier.py) can't tell which catch-all /
|
|
mx_unreachable addresses actually deliver. The only ground truth is a REAL send.
|
|
We do that from a disposable burner sending domain (NOT performancewest.net /
|
|
carrierone.com — see docs/campaign-deliverability-plan.md) so the inevitable
|
|
bounces never touch PW's reputation. This script reconciles that send:
|
|
|
|
1. Scan the burner MTA's mail.log for messages FROM the burner sender.
|
|
2. Any recipient that hard-bounced -> fmcsa_carriers.email_verify_result =
|
|
'hard_bounced' (permanently excluded from PW campaigns).
|
|
3. Any recipient that was DELIVERED (status=sent, no later bounce) and is not
|
|
already smtp_valid -> 'send_confirmed' (proven deliverable; the PW
|
|
campaign filter treats smtp_valid + send_confirmed as sendable).
|
|
|
|
Idempotent: only upgrades 'catch_all_*' / 'mx_unreachable' / NULL rows to
|
|
'send_confirmed', and only sets 'hard_bounced' on a real bounce. Never downgrades
|
|
an already-confirmed address except to mark a genuine bounce.
|
|
|
|
Usage:
|
|
python3 -m scripts.burner_list_verify --log /var/log/burner-mail.log
|
|
python3 -m scripts.burner_list_verify --log mail.log --dry-run
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
import psycopg2
|
|
|
|
DATABASE_URL = os.getenv("DATABASE_URL", "")
|
|
|
|
# Sender(s) used by the burner verification campaign. Override via env when the
|
|
# burner domain is provisioned (e.g. BURNER_SENDERS="verify@listcheck-xyz.com").
|
|
BURNER_SENDERS = {
|
|
s.strip().lower()
|
|
for s in os.getenv("BURNER_SENDERS", "").split(",")
|
|
if s.strip()
|
|
}
|
|
|
|
QID_RE = re.compile(r"postfix/\w+\[\d+\]: ([A-Z0-9]+):")
|
|
FROM_RE = re.compile(r"from=<([^>]*)>")
|
|
TO_RE = re.compile(r"to=<([^>]*)>")
|
|
STATUS_RE = re.compile(r"status=(\w+)")
|
|
|
|
# Results we are allowed to UPGRADE to 'send_confirmed'. We never overwrite an
|
|
# explicit smtp_valid (already best) or a hard_bounced (worse signal wins).
|
|
UPGRADABLE = ("catch_all_domain", "catch_all_detected", "mx_unreachable",
|
|
"smtp_temp_error", "smtp_unknown_451", "smtp_unknown_450")
|
|
|
|
|
|
def scan_log(log_path: str) -> tuple[set[str], set[str]]:
|
|
"""Return (delivered_emails, bounced_emails) for burner-sender messages."""
|
|
if not BURNER_SENDERS:
|
|
print("ERROR: set BURNER_SENDERS (e.g. verify@your-burner-domain.com)",
|
|
file=sys.stderr)
|
|
return set(), set()
|
|
|
|
burner_qids: set[str] = set()
|
|
qid_rcpt: dict[str, str] = {}
|
|
delivered: set[str] = set()
|
|
bounced: set[str] = set()
|
|
|
|
with open(log_path, errors="ignore") as f:
|
|
for line in f:
|
|
qm = QID_RE.search(line)
|
|
if not qm:
|
|
continue
|
|
qid = qm.group(1)
|
|
|
|
fm = FROM_RE.search(line)
|
|
if fm and fm.group(1).lower() in BURNER_SENDERS:
|
|
burner_qids.add(qid)
|
|
|
|
tm = TO_RE.search(line)
|
|
sm = STATUS_RE.search(line)
|
|
if tm and sm and qid in burner_qids:
|
|
rcpt = tm.group(1).lower()
|
|
qid_rcpt[qid] = rcpt
|
|
status = sm.group(1).lower()
|
|
if status == "bounced":
|
|
bounced.add(rcpt)
|
|
elif status == "sent":
|
|
delivered.add(rcpt)
|
|
|
|
# A bounce anywhere wins over a "sent" (deferred-then-bounced).
|
|
delivered -= bounced
|
|
return delivered, bounced
|
|
|
|
|
|
def writeback(delivered: set[str], bounced: set[str], dry_run: bool = False) -> dict:
|
|
"""Apply send_confirmed / hard_bounced to fmcsa_carriers."""
|
|
stats = {"confirmed": 0, "bounced": 0}
|
|
if not (delivered or bounced):
|
|
return stats
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
try:
|
|
with conn.cursor() as cur:
|
|
# Hard bounces: always mark (worst signal wins), excludes from PW sends.
|
|
for email in bounced:
|
|
if dry_run:
|
|
stats["bounced"] += 1
|
|
continue
|
|
cur.execute(
|
|
"""UPDATE fmcsa_carriers
|
|
SET email_verify_result = 'hard_bounced',
|
|
email_verified = FALSE
|
|
WHERE lower(email_address) = %s
|
|
AND email_verify_result IS DISTINCT FROM 'hard_bounced'""",
|
|
(email,),
|
|
)
|
|
stats["bounced"] += cur.rowcount
|
|
# Delivered: upgrade soft/unknown results to send_confirmed.
|
|
for email in delivered:
|
|
if dry_run:
|
|
stats["confirmed"] += 1
|
|
continue
|
|
cur.execute(
|
|
"""UPDATE fmcsa_carriers
|
|
SET email_verify_result = 'send_confirmed',
|
|
email_verified = TRUE
|
|
WHERE lower(email_address) = %s
|
|
AND (email_verify_result IN %s OR email_verify_result IS NULL)""",
|
|
(email, UPGRADABLE),
|
|
)
|
|
stats["confirmed"] += cur.rowcount
|
|
if not dry_run:
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
return stats
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--log", default="/var/log/burner-mail.log",
|
|
help="burner MTA mail.log to scan")
|
|
ap.add_argument("--dry-run", action="store_true")
|
|
args = ap.parse_args()
|
|
|
|
if not os.path.exists(args.log):
|
|
print(f"log not found: {args.log}", file=sys.stderr)
|
|
return 1
|
|
delivered, bounced = scan_log(args.log)
|
|
print(f"burner scan: {len(delivered)} delivered, {len(bounced)} bounced")
|
|
stats = writeback(delivered, bounced, dry_run=args.dry_run)
|
|
tag = "[dry-run] " if args.dry_run else ""
|
|
print(f"{tag}writeback: send_confirmed +{stats['confirmed']}, "
|
|
f"hard_bounced +{stats['bounced']}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|