From ed0a44645ef19eba95a606014d3ec86a126e4966 Mon Sep 17 00:00:00 2001 From: justin Date: Fri, 29 May 2026 14:34:23 -0500 Subject: [PATCH] Email verifier: add catch-all domain detection via random probe Before checking the real address, sends a random 20-char address to the domain. If the server accepts it (250), the domain is catch-all and individual verification is meaningless. Result cached per domain. Existing known catch-all list (gmail, outlook, etc.) still bypassed. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/workers/email_verifier.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/scripts/workers/email_verifier.py b/scripts/workers/email_verifier.py index d426eec..64cab33 100644 --- a/scripts/workers/email_verifier.py +++ b/scripts/workers/email_verifier.py @@ -55,6 +55,8 @@ CATCH_ALL_DOMAINS = { # Cache MX lookups to avoid repeated DNS queries _mx_cache: dict[str, list[str] | None] = {} +# Cache catch-all detection per domain +_catchall_cache: dict[str, bool] = {} def get_mx_hosts(domain: str) -> list[str] | None: @@ -99,11 +101,11 @@ def verify_email(email: str) -> tuple[bool, str]: if not mx_hosts: return False, "no_mx_records" - # Step 3: Catch-all domains — can't verify, assume valid + # Step 3: Known catch-all domains — can't verify, assume valid if domain in CATCH_ALL_DOMAINS: return True, "catch_all_domain" - # Step 4: SMTP handshake + # Step 4: SMTP handshake + catch-all detection for mx_host in mx_hosts[:2]: # Try first 2 MX servers try: with smtplib.SMTP(timeout=10) as smtp: @@ -112,6 +114,25 @@ def verify_email(email: str) -> tuple[bool, str]: code, _ = smtp.mail(OUR_EMAIL) if code != 250: continue + + # Step 4a: Check if domain is catch-all by sending a random address + if domain not in _catchall_cache: + import random, string + random_user = "".join(random.choices(string.ascii_lowercase, k=20)) + probe_code, _ = smtp.rcpt(f"{random_user}@{domain}") + _catchall_cache[domain] = probe_code == 250 + if _catchall_cache[domain]: + LOG.info("Catch-all detected: %s (accepts %s@%s)", domain, random_user, domain) + # Reset for real check + smtp.rset() + smtp.mail(OUR_EMAIL) + + if _catchall_cache.get(domain): + # Catch-all — accepts everything, can't verify individual address + smtp.quit() + return True, "catch_all_detected" + + # Step 4b: Check the actual email code, msg = smtp.rcpt(email) smtp.quit()