diff --git a/infra/cron/pw-mx-tag b/infra/cron/pw-mx-tag new file mode 100644 index 0000000..7d90a3a --- /dev/null +++ b/infra/cron/pw-mx-tag @@ -0,0 +1,19 @@ +# Daily MX tagging for the FMCSA carrier audience. Resolves the MX records of +# carrier email domains that don't yet have an mx_provider and stores the +# receiving operator (google/microsoft/proofpoint/... or an "mx:" label for +# everything else). The trucking campaign builder uses mx_provider to EXCLUDE the +# big + consumer mailbox operators during warmup (Google/MS/Yahoo/iCloud/... all +# throttle or complaint-block a cold IP) and to per-operator-throttle the rest. +# +# WHY a cron: mx_provider was previously only tagged by hand, so the untagged +# (NULL) backlog never drained (~316k sendable carriers on 2026-06-20) and every +# new FMCSA census import landed untagged. Untagged carriers are KEPT in the warmup +# pool (anti-starvation), so an untagged Google/Yahoo domain can slip the exclusion +# until it's tagged. Running daily keeps the audience classified so the warmup +# exclusion stays effective. Idempotent + bounded: only resolves domains where +# mx_provider IS NULL, capped at --limit-domains/run. +# +# Runs 05:45 UTC, before the 06:10 reputation monitor / 06:20 DMARC / 06:30 scrub +# and well before the 08:00 trucking builder, so the day's send sees fresh tags. +# --only-unsent prioritizes carriers the builder will actually mail. +45 5 * * * deploy cd /opt/performancewest && docker compose exec -T workers python3 -m scripts.mx_tag_carriers --only-unsent --limit-domains 20000 >> /var/log/pw-mx-tag.log 2>&1 diff --git a/scripts/build_trucking_campaigns.py b/scripts/build_trucking_campaigns.py index cc6b14c..e1ec7e3 100644 --- a/scripts/build_trucking_campaigns.py +++ b/scripts/build_trucking_campaigns.py @@ -189,8 +189,25 @@ _COUPON_ALPHABET = "ABCDEFGHJKLMNPQRSTUVWXYZ" # no I/O to avoid confusion # Set MAIN_SKIP_BIG_MX=0 to stop excluding once truly warmed up. MAIN_SKIP_BIG_MX = os.getenv("MAIN_SKIP_BIG_MX", "1") not in ("0", "false", "") # Operators to hold out during warmup (they aggressively throttle/blocklist). +# These are the "clean label" providers mx_tag_carriers.py recognizes by MX host. BIG_MX_OPERATORS = ("google", "microsoft", "proofpoint", "mimecast", "barracuda", "cisco", "broadcom") +# Consumer / aggressively-filtering mailbox operators that mx_tag_carriers.py +# labels with the "mx:" prefix (no clean label). They complaint-block and filter +# like the big operators, so hold them out of the warmup pool too. The literal- +# domain blocklist (BLOCKED_EMAIL_DOMAINS) already stops someone@yahoo.com / +# @icloud.com, but a CUSTOM domain whose MX points at Yahoo Small Business / AOL +# (mx:yahoodns.net), Apple iCloud+ Custom Domain (mx:icloud.com), or a legacy +# consumer ISP is invisible to that string layer -- only the MX tag catches it. +# (Live 2026-06-20: mx:yahoodns.net alone = 283k sendable carriers.) +CONSUMER_MX_OPERATORS = ( + "mx:yahoodns.net", "mx:icloud.com", "mx:comcast.net", "mx:charter.net", + "mx:centurylink.net", "mx:windstream.net", "mx:tds.net", + "mx:earthlink-vadesecure.net", +) +# Everything held out of the warmup pool entirely until MAIN_BIG_MX_EXCLUDE_UNTIL_DAY, +# then re-introduced gradually via mx_daily_caps(). +WARMUP_EXCLUDE_OPERATORS = BIG_MX_OPERATORS + CONSUMER_MX_OPERATORS MAIN_WARMUP_START_FILE = os.getenv("MTA_WARMUP_START_FILE", "/etc/postfix/pw-warmup-start") # How many days to EXCLUDE the big operators entirely. The Jun 13-14 block storm # means reputation is NOT yet established despite a high calendar day count, so we @@ -208,9 +225,10 @@ def main_warmup_day() -> int: def mx_daily_caps(day: int) -> dict: - """Per-operator daily NEW-recipient caps. Big operators are EXCLUDED entirely - until MAIN_BIG_MX_EXCLUDE_UNTIL_DAY (reputation recovery), then re-introduced - gradually. 'default' is the per-operator cap for the long tail.""" + """Per-operator daily NEW-recipient caps. Big + consumer-MX operators are + EXCLUDED entirely until MAIN_BIG_MX_EXCLUDE_UNTIL_DAY (reputation recovery), + then re-introduced gradually. 'default' is the per-operator cap for the long + tail of small/self-hosted systems that carry the warmup volume.""" if day <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY: big, default = 0, 120 # big OFF; long-tail operators carry volume elif day <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY + 7: @@ -219,7 +237,8 @@ def mx_daily_caps(day: int) -> dict: big, default = 120, 200 else: big, default = 300, 250 - caps = {op: big for op in BIG_MX_OPERATORS} + # Both big-label and consumer-mx operators ramp together on the same schedule. + caps = {op: big for op in WARMUP_EXCLUDE_OPERATORS} caps["__default__"] = default return caps @@ -923,13 +942,14 @@ def fetch_carriers( target_state_params = [] # During warmup, exclude carriers on the big operators that throttle/blocklist - # (Google, Microsoft, etc.) -- mx_provider is set by mx_tag_carriers.py. - # Untagged carriers (mx_provider IS NULL) are kept: the per-MX throttle in the - # selector still bounds them, and excluding NULLs would starve the pool until - # tagging completes. + # (Google, Microsoft, etc.) AND the consumer mailbox operators behind the + # "mx:" prefix (Yahoo Small Business, iCloud custom domains, legacy ISPs) -- + # mx_provider is set by mx_tag_carriers.py. Untagged carriers (mx_provider IS + # NULL) are kept: the per-MX throttle in the selector still bounds them, and + # excluding NULLs would starve the pool until tagging completes. big_mx_exclude = "" if MAIN_SKIP_BIG_MX and main_warmup_day() <= MAIN_BIG_MX_EXCLUDE_UNTIL_DAY: - ops = ",".join("'%s'" % o for o in BIG_MX_OPERATORS) + ops = ",".join("'%s'" % o for o in WARMUP_EXCLUDE_OPERATORS) big_mx_exclude = f"AND (mx_provider IS NULL OR mx_provider NOT IN ({ops}))" cur.execute(f"""