From a4d67bcf9b9a166cdd39d55af06c96d9d5292ec7 Mon Sep 17 00:00:00 2001 From: justin Date: Sun, 7 Jun 2026 18:08:36 -0500 Subject: [PATCH] hc-warmup: add list-hygiene script (drop undeliverable addrs, smtp_valid first) Keeps only deliverable addresses (smtp_valid + catch_all_detected), drops mx_unreachable + smtp_unknown rejects that defer/bounce and damage the warming HC IP reputation. Sorts smtp_valid first so the daily slice hits verified mailboxes first. Used to clean hc_warmup_nongoogle.csv (501 -> 399 rows). --- scripts/clean_hc_warmup_list.py | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scripts/clean_hc_warmup_list.py diff --git a/scripts/clean_hc_warmup_list.py b/scripts/clean_hc_warmup_list.py new file mode 100644 index 0000000..cd921a5 --- /dev/null +++ b/scripts/clean_hc_warmup_list.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +"""Clean the healthcare warmup list before sending: keep only deliverable +addresses so the warming HC IPs (.107-.109) build reputation on good mail. + +Keeps verify_reason in {smtp_valid, catch_all_detected}; drops mx_unreachable, +smtp_unknown_451/541/554 (these defer/bounce and hurt warmup reputation). Sorts +smtp_valid first so the daily slice hits verified mailboxes first. + +Reads SRC, writes OUT (then swap OUT over the cron's hc_warmup_nongoogle.csv). +Parses with the csv module (the name column contains commas inside quotes, so +naive comma-splitting misparses verify_reason). +""" + +import csv +from collections import Counter + +SRC = "/opt/performancewest/data/hc_warmup_nongoogle.csv" +OUT = "/opt/performancewest/data/hc_warmup_nongoogle_clean.csv" +keep_reasons = {"smtp_valid", "catch_all_detected", "Y"} + +with open(SRC, newline="") as f: + r = csv.DictReader(f) + cols = r.fieldnames + allrows = list(r) + +total = len(allrows) +print("verify_reason counts:", dict(Counter((row.get("verify_reason") or "").strip() for row in allrows))) + +kept = [row for row in allrows if (row.get("verify_reason") or "").strip() in keep_reasons] +order = {"smtp_valid": 0, "catch_all_detected": 1, "Y": 2} +kept.sort(key=lambda row: order.get((row.get("verify_reason") or "").strip(), 9)) + +with open(OUT, "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=cols) + w.writeheader() + w.writerows(kept) + +print(f"total={total} kept={len(kept)} dropped={total - len(kept)}") +print("kept breakdown:", dict(Counter((row.get("verify_reason") or "").strip() for row in kept))) +print("wrote", OUT)