From 1eb29f80beb987297fac1b8243fe99672b067f4e Mon Sep 17 00:00:00 2001 From: justin Date: Wed, 17 Jun 2026 05:48:08 -0500 Subject: [PATCH] fix(verifier): mx_unreachable was mislabeling live big-ISP mailboxes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The verifier returned (True, 'mx_unreachable') when it couldn't complete a port-25 probe to ANY MX — marking 438,163 addresses email_verified=TRUE. But these are NOT dead: they're dominated by Comcast (13.7k), AT&T/SBCGlobal (13.5k), Verizon, Cox, Charter, Frontier, etc. — major ISPs that deliberately tarpit/refuse probes from unknown IPs. Confirmed from prod: comcast MX connects + returns 220. The probe failure ≠ undeliverable. Fix: return (False, 'mx_probe_blocked') — MX exists, deliverability UNKNOWN, must be confirmed by a real send. Excluded from PW campaigns; prime burner-verification target (burner_list_verify upgrades it to send_confirmed on delivery). Existing 438,163 mx_unreachable rows reclassified in prod to mx_probe_blocked / verified=FALSE. --- scripts/burner_list_verify.py | 7 +++++-- scripts/workers/email_verifier.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/burner_list_verify.py b/scripts/burner_list_verify.py index dc5714b..110bf53 100644 --- a/scripts/burner_list_verify.py +++ b/scripts/burner_list_verify.py @@ -48,8 +48,11 @@ STATUS_RE = re.compile(r"status=(\w+)") # Results we are allowed to UPGRADE to 'send_confirmed'. We never overwrite an # explicit smtp_valid (already best) or a hard_bounced (worse signal wins). -UPGRADABLE = ("catch_all_domain", "catch_all_detected", "mx_unreachable", - "smtp_temp_error", "smtp_unknown_451", "smtp_unknown_450") +# 'mx_probe_blocked' is the big-ISP pool (Comcast/AT&T/Verizon/etc.) the SMTP +# probe couldn't reach — these are the prime burner-verification targets. +UPGRADABLE = ("catch_all_domain", "catch_all_detected", "mx_probe_blocked", + "mx_unreachable", "smtp_temp_error", "smtp_unknown_451", + "smtp_unknown_450") def scan_log(log_path: str) -> tuple[set[str], set[str]]: diff --git a/scripts/workers/email_verifier.py b/scripts/workers/email_verifier.py index f83933d..ccfa583 100644 --- a/scripts/workers/email_verifier.py +++ b/scripts/workers/email_verifier.py @@ -162,8 +162,15 @@ def verify_email(email: str) -> tuple[bool, str]: LOG.debug("SMTP error for %s via %s: %s", email, mx_host, e) continue - # Couldn't connect to any MX — domain exists but server unreachable - return True, "mx_unreachable" + # Could not complete an SMTP probe to ANY MX, even though valid MX records + # exist. This does NOT mean the address is dead — large providers (Comcast, + # AT&T/Yahoo, Verizon, Cox, Charter, etc.) deliberately tarpit / refuse port-25 + # probes from unknown IPs as an anti-spam measure, so the probe times out on + # millions of perfectly deliverable mailboxes. We therefore return + # email_verified=FALSE with 'mx_probe_blocked': the domain has mail servers but + # deliverability is UNKNOWN and must be confirmed by a real send (burner-domain + # verification → 'send_confirmed'). Campaigns must NOT treat this as sendable. + return False, "mx_probe_blocked" def verify_table(table: str, limit: int | None = None, dry_run: bool = False, where: str | None = None) -> dict: