hc-warmup: add list-hygiene script (drop undeliverable addrs, smtp_valid first)
Keeps only deliverable addresses (smtp_valid + catch_all_detected), drops mx_unreachable + smtp_unknown rejects that defer/bounce and damage the warming HC IP reputation. Sorts smtp_valid first so the daily slice hits verified mailboxes first. Used to clean hc_warmup_nongoogle.csv (501 -> 399 rows).
This commit is contained in:
parent
e5db147319
commit
a4d67bcf9b
1 changed files with 40 additions and 0 deletions
40
scripts/clean_hc_warmup_list.py
Normal file
40
scripts/clean_hc_warmup_list.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Clean the healthcare warmup list before sending: keep only deliverable
|
||||||
|
addresses so the warming HC IPs (.107-.109) build reputation on good mail.
|
||||||
|
|
||||||
|
Keeps verify_reason in {smtp_valid, catch_all_detected}; drops mx_unreachable,
|
||||||
|
smtp_unknown_451/541/554 (these defer/bounce and hurt warmup reputation). Sorts
|
||||||
|
smtp_valid first so the daily slice hits verified mailboxes first.
|
||||||
|
|
||||||
|
Reads SRC, writes OUT (then swap OUT over the cron's hc_warmup_nongoogle.csv).
|
||||||
|
Parses with the csv module (the name column contains commas inside quotes, so
|
||||||
|
naive comma-splitting misparses verify_reason).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
SRC = "/opt/performancewest/data/hc_warmup_nongoogle.csv"
|
||||||
|
OUT = "/opt/performancewest/data/hc_warmup_nongoogle_clean.csv"
|
||||||
|
keep_reasons = {"smtp_valid", "catch_all_detected", "Y"}
|
||||||
|
|
||||||
|
with open(SRC, newline="") as f:
|
||||||
|
r = csv.DictReader(f)
|
||||||
|
cols = r.fieldnames
|
||||||
|
allrows = list(r)
|
||||||
|
|
||||||
|
total = len(allrows)
|
||||||
|
print("verify_reason counts:", dict(Counter((row.get("verify_reason") or "").strip() for row in allrows)))
|
||||||
|
|
||||||
|
kept = [row for row in allrows if (row.get("verify_reason") or "").strip() in keep_reasons]
|
||||||
|
order = {"smtp_valid": 0, "catch_all_detected": 1, "Y": 2}
|
||||||
|
kept.sort(key=lambda row: order.get((row.get("verify_reason") or "").strip(), 9))
|
||||||
|
|
||||||
|
with open(OUT, "w", newline="") as f:
|
||||||
|
w = csv.DictWriter(f, fieldnames=cols)
|
||||||
|
w.writeheader()
|
||||||
|
w.writerows(kept)
|
||||||
|
|
||||||
|
print(f"total={total} kept={len(kept)} dropped={total - len(kept)}")
|
||||||
|
print("kept breakdown:", dict(Counter((row.get("verify_reason") or "").strip() for row in kept)))
|
||||||
|
print("wrote", OUT)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue