new-site/scripts/_email_exclusions.py
justin b40fc7ec36 feat(deliverability): exclude Apple consumer mail + scrub stale consumer subs from Listmonk
The fmcsa campaign builders already exclude gmail/yahoo/microsoft/etc. from NEW
audience selections, but two reputation leaks remained on the LIST-BASED side:

1. iCloud/Apple gap. icloud.com/me.com/mac.com were never in the exclusion set.
   A 2026-06 Listmonk audit found 1,321 ENABLED iCloud subscribers on list 3
   ("FCC Carriers - Direct Contacts") -- the single largest enabled-consumer
   bucket -- being cold-blasted with no exclusion at all. Add APPLE_CONSUMER_DOMAINS.

2. Stale already-imported consumer subs. List-based campaigns (e.g. the running
   CRTC/USF blast on list 3) keep hitting consumer addresses imported BEFORE the
   relevant domain joined the exclusion list. gmail.com was still the #1 bounce
   domain via that campaign even though new selections exclude it. Add
   scrub_listmonk_consumer.py: reconciles the live Listmonk subscriber table
   against the authoritative exclusion list and blocklists any ENABLED subscriber
   whose address is_blocked(). Idempotent; re-run whenever the exclusion grows so
   it applies retroactively. Uses the same 'blocklisted' terminal state as the
   bounce handler, so contacts are excluded from all current/future campaigns
   without deleting history. Supports --dry-run and both listmonk / listmonk_hc.
2026-06-18 23:55:58 -05:00

134 lines
7 KiB
Python

"""Shared recipient-domain exclusions for outbound cold-email campaigns.
We self-host our MTA (transactional relays like SES forbid cold email), so we
must protect our sending-IP reputation manually. The two biggest levers:
1. NOT mailing the Yahoo/Verizon-Media family: those providers aggressively
defer cold senders with "unexpected volume / user complaints" 421
responses, which poisons the IP for every other provider too.
2. NOT mailing Google CONSUMER mailboxes (gmail.com etc.) from a cold/warming
IP: Google hard-rejects them with 550-5.7.1 "this message is likely
unsolicited mail", and those rejections are reputation-damaging. (On
2026-06-08 a warmup audit found gmail.com alone was 77% of our 550-5.7.1
blocks -- 427 of 556.) Custom domains hosted on Google Workspace are a
smaller, MX-only signal handled separately in the per-vertical builders.
Keep this list authoritative and import it everywhere we build audiences.
"""
from __future__ import annotations
# Yahoo / Verizon Media operates ALL of these consumer domains. Legacy AT&T and
# Frontier consumer mail was handed off to Yahoo's infrastructure as well.
YAHOO_FAMILY_DOMAINS: frozenset[str] = frozenset({
# Yahoo / AOL core
"yahoo.com", "yahoo.com.mx", "yahoo.es", "yahoo.it", "yahoo.ca",
"myyahoo.com", "ymail.com", "rocketmail.com",
"aol.com", "aol.com.mx", "aim.com", "love.com", "games.com", "wow.com",
"netscape.net", "netscape.com", "cs.com", "compuserve.com",
# AT&T family (Yahoo-hosted)
"att.net", "sbcglobal.net", "bellsouth.net", "pacbell.net",
"ameritech.net", "swbell.net", "snet.net", "flash.net", "prodigy.net",
"wans.net", "nvbell.net",
# Verizon family (Yahoo-hosted)
"verizon.net", "verizongni.com", "bellatlantic.net",
# Frontier (Yahoo-hosted)
"frontier.com", "frontiernet.net",
})
# Google consumer mailboxes. Google's cold-IP spam filter (550-5.7.1) is the
# strictest of the big providers; consumer gmail accounts have the highest
# complaint sensitivity. We hold these out of cold/warmup sends. (This is the
# domain-string layer; custom domains silently on Google Workspace need an MX
# lookup and are handled in the per-vertical builders, e.g. the healthcare
# mx_provider flag.)
GOOGLE_CONSUMER_DOMAINS: frozenset[str] = frozenset({
"gmail.com", "googlemail.com",
})
# Microsoft consumer mailboxes (Outlook.com / Hotmail). Microsoft's cold-IP
# filtering (SmartScreen) tends to silently route cold B2B mail to Junk or
# soft-defer rather than hard-bounce, so it is less visible than Google's
# 550-5.7.1 but still a reputation drag (low engagement, spam-folder placement)
# on a warming IP. These are consumer mailboxes, not real B2B carrier contacts,
# so we hold them out of cold/warmup sends like the other consumer providers.
MICROSOFT_CONSUMER_DOMAINS: frozenset[str] = frozenset({
"hotmail.com", "outlook.com", "live.com", "msn.com", "hotmail.co.uk",
"hotmail.fr", "live.co.uk", "outlook.es", "passport.com", "windowslive.com",
})
# Apple consumer mailboxes (iCloud Mail / legacy .Mac / MobileMe). Apple is a
# pure-consumer provider -- there is no Apple "Workspace" tenant that a real B2B
# carrier would run its company mail on, so every one of these is a personal
# inbox, not a business contact. iCloud applies aggressive cold-sender filtering
# (silent Junk routing plus 5xx rejects on reputation-poor senders) and a 2026
# Listmonk audit found iCloud was the single largest enabled-consumer bucket
# leaking into list-based campaigns (1,321 enabled subs on list 3 alone). Hold
# them out of cold/warmup sends like the other consumer providers.
APPLE_CONSUMER_DOMAINS: frozenset[str] = frozenset({
"icloud.com", "me.com", "mac.com",
})
# Legal / complaint do-not-contact list. Addresses and domains here must NEVER
# be cold-mailed or re-imported, independent of consumer-domain reputation
# rules. Add a domain or a specific address when someone makes a formal
# do-not-contact / opt-out demand we are honoring (e.g. a regulator complaint).
# dataspindle.com / dave@dataspindle.com -- David Sgro, PA OAG complaint
# BCP-26-05-025816; opted out 2026-04-13, permanently suppressed.
DO_NOT_CONTACT_DOMAINS: frozenset[str] = frozenset({
"dataspindle.com",
})
DO_NOT_CONTACT_EMAILS: frozenset[str] = frozenset({
"dave@dataspindle.com",
})
# Defunct / legacy / satellite ISP mailbox domains. Cold-mailing these is pure
# reputation drag: the mailboxes are overwhelmingly dead (the brand was shut down
# or absorbed years ago and the addresses now hard-bounce) or the operator
# (satellite / small rural ISP) aggressively defers cold B2B mail with poor
# eventual delivery. Identified from our own Listmonk bounce table (top bounced
# recipient domains) cross-checked against ISP status. NOTE: still-active large
# consumer ISPs (comcast.net, charter.net, cox.net, centurylink.net) are
# deliberately NOT here -- their bounces were the cold-IP/no-DKIM reputation
# problem (now fixed), not dead mailboxes, and they carry real prospects.
DEAD_ISP_DOMAINS: frozenset[str] = frozenset({
# Defunct dial-up / early-ISP brands (mail shut down or vestigial)
"earthlink.net", "peoplepc.com", "mindspring.com", "netzero.net",
"netzero.com", "juno.com", "excite.com", "lycos.com", "wmconnect.com",
"adelphia.net", "voyager.net", "core.com", "localnet.com", "pldi.net",
"ptsi.net", "cablespeed.com",
# CenturyLink / Qwest / Embarq legacy brands (migrated/abandoned)
"qwest.net", "qwestoffice.net", "embarqmail.com", "centurytel.net",
"citlink.net", "citynet.net",
# Satellite (poor cold-mail deliverability, high defer/bounce)
"hughes.net", "wildblue.net", "dishmail.net", "wildblueinternet.net",
# Altice / Optimum / Suddenlink / Cablevision family (rural, aggressive defer)
"optonline.net", "suddenlink.net", "cebridge.net", "bresnan.net",
# WOW! / Knology, Mediacom, Insight, Atlantic Broadband/Breezeline, Cable One
"wowway.com", "knology.net", "mchsi.com", "insightbb.com", "atlanticbb.net",
"breezeline.net", "cableone.net", "cableone.com",
# Small / rural regional ISPs (aggressive defer, low cold deliverability)
"windstream.net", "tds.net", "iowatelecom.net", "netins.net", "mhtc.net",
"arvig.net", "consolidated.net", "fuse.net", "ncn.net", "new.rr.com",
# Alaska regional (satellite/long-haul, poor cold deliverability)
"gci.net", "alaska.net", "acsalaska.net", "gulftel.com",
})
# The full set of consumer domains we refuse to cold-mail. Extend here as we
# discover other reputation-sensitive providers.
BLOCKED_EMAIL_DOMAINS: frozenset[str] = (
YAHOO_FAMILY_DOMAINS | GOOGLE_CONSUMER_DOMAINS | MICROSOFT_CONSUMER_DOMAINS
| APPLE_CONSUMER_DOMAINS | DEAD_ISP_DOMAINS | DO_NOT_CONTACT_DOMAINS
)
def domain_of(email: str) -> str:
"""Return the lowercased domain part of an email, or '' if malformed."""
if "@" not in email:
return ""
return email.rsplit("@", 1)[-1].strip().lower()
def is_blocked(email: str) -> bool:
e = (email or "").strip().lower()
return e in DO_NOT_CONTACT_EMAILS or domain_of(e) in BLOCKED_EMAIL_DOMAINS