#!/usr/bin/env python3 """Clean the healthcare warmup list before sending: keep only deliverable addresses so the warming HC IPs (.107-.109) build reputation on good mail. Keeps verify_reason in {smtp_valid, catch_all_detected}; drops mx_unreachable, smtp_unknown_451/541/554 (these defer/bounce and hurt warmup reputation). Sorts smtp_valid first so the daily slice hits verified mailboxes first. Reads SRC, writes OUT (then swap OUT over the cron's hc_warmup_nongoogle.csv). Parses with the csv module (the name column contains commas inside quotes, so naive comma-splitting misparses verify_reason). """ import csv from collections import Counter SRC = "/opt/performancewest/data/hc_warmup_nongoogle.csv" OUT = "/opt/performancewest/data/hc_warmup_nongoogle_clean.csv" keep_reasons = {"smtp_valid", "catch_all_detected", "Y"} with open(SRC, newline="") as f: r = csv.DictReader(f) cols = r.fieldnames allrows = list(r) total = len(allrows) print("verify_reason counts:", dict(Counter((row.get("verify_reason") or "").strip() for row in allrows))) kept = [row for row in allrows if (row.get("verify_reason") or "").strip() in keep_reasons] order = {"smtp_valid": 0, "catch_all_detected": 1, "Y": 2} kept.sort(key=lambda row: order.get((row.get("verify_reason") or "").strip(), 9)) with open(OUT, "w", newline="") as f: w = csv.DictWriter(f, fieldnames=cols) w.writeheader() w.writerows(kept) print(f"total={total} kept={len(kept)} dropped={total - len(kept)}") print("kept breakdown:", dict(Counter((row.get("verify_reason") or "").strip() for row in kept))) print("wrote", OUT)