mail: close MX-exclusion gaps — exclude consumer mx: operators + add mx-tag cron
Fix 1 (build_trucking_campaigns.py): the warmup big-MX exclusion only covered the clean-label operators (google/microsoft/proofpoint/...). Consumer mailbox operators that mx_tag_carriers.py labels with an "mx:" prefix slipped BOTH the exclusion and the per-MX throttle -- notably mx:yahoodns.net (283k sendable carriers = Yahoo Small Business/AOL custom domains) and mx:icloud.com (25k), plus comcast/charter/centurylink/windstream/tds/earthlink. These are custom domains whose MX points at a consumer provider, invisible to the literal-domain blocklist. Added CONSUMER_MX_OPERATORS, folded into WARMUP_EXCLUDE_OPERATORS used by both the fetch_carriers() exclusion SQL and mx_daily_caps() (same day-30 ramp). Behind the existing MAIN_SKIP_BIG_MX switch. Validated read-only: after the fix the warmup-eligible pool is 353,909 carriers (315,892 untagged + ~38k genuinely small/self-hosted operators), so the long tail still sustains the daily quota -- not starved -- while 0 consumer-MX carriers are selected during warmup. Fix 3 (infra/cron/pw-mx-tag): mx_tag_carriers.py was on no cron, so the untagged (NULL) backlog (~316k) never drained and new FMCSA imports stayed untagged, slowly re-opening the gap. Added a daily 05:45 UTC cron (--only-unsent --limit-domains 20000), before the 08:00 builder. Idempotent/bounded (only tags mx_provider IS NULL). Verified live: a 200-domain test run tagged 216 domains. (Fix 2 -- bounding the NULL bucket cap -- deferred; the cron will drain it.)
This commit is contained in:
parent
285a4a087c
commit
9eeed47c4b
2 changed files with 48 additions and 9 deletions
19
infra/cron/pw-mx-tag
Normal file
19
infra/cron/pw-mx-tag
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Daily MX tagging for the FMCSA carrier audience. Resolves the MX records of
|
||||||
|
# carrier email domains that don't yet have an mx_provider and stores the
|
||||||
|
# receiving operator (google/microsoft/proofpoint/... or an "mx:<root>" label for
|
||||||
|
# everything else). The trucking campaign builder uses mx_provider to EXCLUDE the
|
||||||
|
# big + consumer mailbox operators during warmup (Google/MS/Yahoo/iCloud/... all
|
||||||
|
# throttle or complaint-block a cold IP) and to per-operator-throttle the rest.
|
||||||
|
#
|
||||||
|
# WHY a cron: mx_provider was previously only tagged by hand, so the untagged
|
||||||
|
# (NULL) backlog never drained (~316k sendable carriers on 2026-06-20) and every
|
||||||
|
# new FMCSA census import landed untagged. Untagged carriers are KEPT in the warmup
|
||||||
|
# pool (anti-starvation), so an untagged Google/Yahoo domain can slip the exclusion
|
||||||
|
# until it's tagged. Running daily keeps the audience classified so the warmup
|
||||||
|
# exclusion stays effective. Idempotent + bounded: only resolves domains where
|
||||||
|
# mx_provider IS NULL, capped at --limit-domains/run.
|
||||||
|
#
|
||||||
|
# Runs 05:45 UTC, before the 06:10 reputation monitor / 06:20 DMARC / 06:30 scrub
|
||||||
|
# and well before the 08:00 trucking builder, so the day's send sees fresh tags.
|
||||||
|
# --only-unsent prioritizes carriers the builder will actually mail.
|
||||||
|
45 5 * * * deploy cd /opt/performancewest && docker compose exec -T workers python3 -m scripts.mx_tag_carriers --only-unsent --limit-domains 20000 >> /var/log/pw-mx-tag.log 2>&1
|
||||||
|
|
@ -189,8 +189,25 @@ _COUPON_ALPHABET = "ABCDEFGHJKLMNPQRSTUVWXYZ" # no I/O to avoid confusion
|
||||||
# Set MAIN_SKIP_BIG_MX=0 to stop excluding once truly warmed up.
|
# Set MAIN_SKIP_BIG_MX=0 to stop excluding once truly warmed up.
|
||||||
MAIN_SKIP_BIG_MX = os.getenv("MAIN_SKIP_BIG_MX", "1") not in ("0", "false", "")
|
MAIN_SKIP_BIG_MX = os.getenv("MAIN_SKIP_BIG_MX", "1") not in ("0", "false", "")
|
||||||
# Operators to hold out during warmup (they aggressively throttle/blocklist).
|
# Operators to hold out during warmup (they aggressively throttle/blocklist).
|
||||||
|
# These are the "clean label" providers mx_tag_carriers.py recognizes by MX host.
|
||||||
BIG_MX_OPERATORS = ("google", "microsoft", "proofpoint", "mimecast",
|
BIG_MX_OPERATORS = ("google", "microsoft", "proofpoint", "mimecast",
|
||||||
"barracuda", "cisco", "broadcom")
|
"barracuda", "cisco", "broadcom")
|
||||||
|
# Consumer / aggressively-filtering mailbox operators that mx_tag_carriers.py
|
||||||
|
# labels with the "mx:" prefix (no clean label). They complaint-block and filter
|
||||||
|
# like the big operators, so hold them out of the warmup pool too. The literal-
|
||||||
|
# domain blocklist (BLOCKED_EMAIL_DOMAINS) already stops someone@yahoo.com /
|
||||||
|
# @icloud.com, but a CUSTOM domain whose MX points at Yahoo Small Business / AOL
|
||||||
|
# (mx:yahoodns.net), Apple iCloud+ Custom Domain (mx:icloud.com), or a legacy
|
||||||
|
# consumer ISP is invisible to that string layer -- only the MX tag catches it.
|
||||||
|
# (Live 2026-06-20: mx:yahoodns.net alone = 283k sendable carriers.)
|
||||||
|
CONSUMER_MX_OPERATORS = (
|
||||||
|
"mx:yahoodns.net", "mx:icloud.com", "mx:comcast.net", "mx:charter.net",
|
||||||
|
"mx:centurylink.net", "mx:windstream.net", "mx:tds.net",
|
||||||
|
"mx:earthlink-vadesecure.net",
|
||||||
|
)
|
||||||
|
# Everything held out of the warmup pool entirely until MAIN_BIG_MX_EXCLUDE_UNTIL_DAY,
|
||||||
|
# then re-introduced gradually via mx_daily_caps().
|
||||||
|
WARMUP_EXCLUDE_OPERATORS = BIG_MX_OPERATORS + CONSUMER_MX_OPERATORS
|
||||||
MAIN_WARMUP_START_FILE = os.getenv("MTA_WARMUP_START_FILE", "/etc/postfix/pw-warmup-start")
|
MAIN_WARMUP_START_FILE = os.getenv("MTA_WARMUP_START_FILE", "/etc/postfix/pw-warmup-start")
|
||||||
# How many days to EXCLUDE the big operators entirely. The Jun 13-14 block storm
|
# How many days to EXCLUDE the big operators entirely. The Jun 13-14 block storm
|
||||||
# means reputation is NOT yet established despite a high calendar day count, so we
|
# means reputation is NOT yet established despite a high calendar day count, so we
|
||||||
|
|
@ -208,9 +225,10 @@ def main_warmup_day() -> int:
|
||||||
|
|
||||||
|
|
||||||
def mx_daily_caps(day: int) -> dict:
|
def mx_daily_caps(day: int) -> dict:
|
||||||
"""Per-operator daily NEW-recipient caps. Big operators are EXCLUDED entirely
|
"""Per-operator daily NEW-recipient caps. Big + consumer-MX operators are
|
||||||
until MAIN_BIG_MX_EXCLUDE_UNTIL_DAY (reputation recovery), then re-introduced
|
EXCLUDED entirely until MAIN_BIG_MX_EXCLUDE_UNTIL_DAY (reputation recovery),
|
||||||
gradually. 'default' is the per-operator cap for the long tail."""
|
then re-introduced gradually. 'default' is the per-operator cap for the long
|
||||||
|
tail of small/self-hosted systems that carry the warmup volume."""
|
||||||
if day <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY:
|
if day <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY:
|
||||||
big, default = 0, 120 # big OFF; long-tail operators carry volume
|
big, default = 0, 120 # big OFF; long-tail operators carry volume
|
||||||
elif day <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY + 7:
|
elif day <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY + 7:
|
||||||
|
|
@ -219,7 +237,8 @@ def mx_daily_caps(day: int) -> dict:
|
||||||
big, default = 120, 200
|
big, default = 120, 200
|
||||||
else:
|
else:
|
||||||
big, default = 300, 250
|
big, default = 300, 250
|
||||||
caps = {op: big for op in BIG_MX_OPERATORS}
|
# Both big-label and consumer-mx operators ramp together on the same schedule.
|
||||||
|
caps = {op: big for op in WARMUP_EXCLUDE_OPERATORS}
|
||||||
caps["__default__"] = default
|
caps["__default__"] = default
|
||||||
return caps
|
return caps
|
||||||
|
|
||||||
|
|
@ -923,13 +942,14 @@ def fetch_carriers(
|
||||||
target_state_params = []
|
target_state_params = []
|
||||||
|
|
||||||
# During warmup, exclude carriers on the big operators that throttle/blocklist
|
# During warmup, exclude carriers on the big operators that throttle/blocklist
|
||||||
# (Google, Microsoft, etc.) -- mx_provider is set by mx_tag_carriers.py.
|
# (Google, Microsoft, etc.) AND the consumer mailbox operators behind the
|
||||||
# Untagged carriers (mx_provider IS NULL) are kept: the per-MX throttle in the
|
# "mx:" prefix (Yahoo Small Business, iCloud custom domains, legacy ISPs) --
|
||||||
# selector still bounds them, and excluding NULLs would starve the pool until
|
# mx_provider is set by mx_tag_carriers.py. Untagged carriers (mx_provider IS
|
||||||
# tagging completes.
|
# NULL) are kept: the per-MX throttle in the selector still bounds them, and
|
||||||
|
# excluding NULLs would starve the pool until tagging completes.
|
||||||
big_mx_exclude = ""
|
big_mx_exclude = ""
|
||||||
if MAIN_SKIP_BIG_MX and main_warmup_day() <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY:
|
if MAIN_SKIP_BIG_MX and main_warmup_day() <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY:
|
||||||
ops = ",".join("'%s'" % o for o in BIG_MX_OPERATORS)
|
ops = ",".join("'%s'" % o for o in WARMUP_EXCLUDE_OPERATORS)
|
||||||
big_mx_exclude = f"AND (mx_provider IS NULL OR mx_provider NOT IN ({ops}))"
|
big_mx_exclude = f"AND (mx_provider IS NULL OR mx_provider NOT IN ({ops}))"
|
||||||
|
|
||||||
cur.execute(f"""
|
cur.execute(f"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue