Email verifier: add catch-all domain detection via random probe
Before checking the real address, sends a random 20-char address to the domain. If the server accepts it (250), the domain is catch-all and individual verification is meaningless. Result cached per domain. Existing known catch-all list (gmail, outlook, etc.) still bypassed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
258e384f95
commit
ed0a44645e
1 changed files with 23 additions and 2 deletions
|
|
@ -55,6 +55,8 @@ CATCH_ALL_DOMAINS = {
|
|||
|
||||
# Cache MX lookups to avoid repeated DNS queries
|
||||
_mx_cache: dict[str, list[str] | None] = {}
|
||||
# Cache catch-all detection per domain
|
||||
_catchall_cache: dict[str, bool] = {}
|
||||
|
||||
|
||||
def get_mx_hosts(domain: str) -> list[str] | None:
|
||||
|
|
@ -99,11 +101,11 @@ def verify_email(email: str) -> tuple[bool, str]:
|
|||
if not mx_hosts:
|
||||
return False, "no_mx_records"
|
||||
|
||||
# Step 3: Catch-all domains — can't verify, assume valid
|
||||
# Step 3: Known catch-all domains — can't verify, assume valid
|
||||
if domain in CATCH_ALL_DOMAINS:
|
||||
return True, "catch_all_domain"
|
||||
|
||||
# Step 4: SMTP handshake
|
||||
# Step 4: SMTP handshake + catch-all detection
|
||||
for mx_host in mx_hosts[:2]: # Try first 2 MX servers
|
||||
try:
|
||||
with smtplib.SMTP(timeout=10) as smtp:
|
||||
|
|
@ -112,6 +114,25 @@ def verify_email(email: str) -> tuple[bool, str]:
|
|||
code, _ = smtp.mail(OUR_EMAIL)
|
||||
if code != 250:
|
||||
continue
|
||||
|
||||
# Step 4a: Check if domain is catch-all by sending a random address
|
||||
if domain not in _catchall_cache:
|
||||
import random, string
|
||||
random_user = "".join(random.choices(string.ascii_lowercase, k=20))
|
||||
probe_code, _ = smtp.rcpt(f"{random_user}@{domain}")
|
||||
_catchall_cache[domain] = probe_code == 250
|
||||
if _catchall_cache[domain]:
|
||||
LOG.info("Catch-all detected: %s (accepts %s@%s)", domain, random_user, domain)
|
||||
# Reset for real check
|
||||
smtp.rset()
|
||||
smtp.mail(OUR_EMAIL)
|
||||
|
||||
if _catchall_cache.get(domain):
|
||||
# Catch-all — accepts everything, can't verify individual address
|
||||
smtp.quit()
|
||||
return True, "catch_all_detected"
|
||||
|
||||
# Step 4b: Check the actual email
|
||||
code, msg = smtp.rcpt(email)
|
||||
smtp.quit()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue