Keeps only deliverable addresses (smtp_valid + catch_all_detected), drops mx_unreachable + smtp_unknown rejects that defer/bounce and damage the warming HC IP reputation. Sorts smtp_valid first so the daily slice hits verified mailboxes first. Used to clean hc_warmup_nongoogle.csv (501 -> 399 rows).
40 lines
1.6 KiB
Python
40 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Clean the healthcare warmup list before sending: keep only deliverable
|
|
addresses so the warming HC IPs (.107-.109) build reputation on good mail.
|
|
|
|
Keeps verify_reason in {smtp_valid, catch_all_detected}; drops mx_unreachable,
|
|
smtp_unknown_451/541/554 (these defer/bounce and hurt warmup reputation). Sorts
|
|
smtp_valid first so the daily slice hits verified mailboxes first.
|
|
|
|
Reads SRC, writes OUT (then swap OUT over the cron's hc_warmup_nongoogle.csv).
|
|
Parses with the csv module (the name column contains commas inside quotes, so
|
|
naive comma-splitting misparses verify_reason).
|
|
"""
|
|
|
|
import csv
|
|
from collections import Counter
|
|
|
|
SRC = "/opt/performancewest/data/hc_warmup_nongoogle.csv"
|
|
OUT = "/opt/performancewest/data/hc_warmup_nongoogle_clean.csv"
|
|
keep_reasons = {"smtp_valid", "catch_all_detected", "Y"}
|
|
|
|
with open(SRC, newline="") as f:
|
|
r = csv.DictReader(f)
|
|
cols = r.fieldnames
|
|
allrows = list(r)
|
|
|
|
total = len(allrows)
|
|
print("verify_reason counts:", dict(Counter((row.get("verify_reason") or "").strip() for row in allrows)))
|
|
|
|
kept = [row for row in allrows if (row.get("verify_reason") or "").strip() in keep_reasons]
|
|
order = {"smtp_valid": 0, "catch_all_detected": 1, "Y": 2}
|
|
kept.sort(key=lambda row: order.get((row.get("verify_reason") or "").strip(), 9))
|
|
|
|
with open(OUT, "w", newline="") as f:
|
|
w = csv.DictWriter(f, fieldnames=cols)
|
|
w.writeheader()
|
|
w.writerows(kept)
|
|
|
|
print(f"total={total} kept={len(kept)} dropped={total - len(kept)}")
|
|
print("kept breakdown:", dict(Counter((row.get("verify_reason") or "").strip() for row in kept)))
|
|
print("wrote", OUT)
|