#!/usr/bin/env python3
"""Clean the healthcare warmup list before sending: keep only deliverable
addresses so the warming HC IPs (.107-.109) build reputation on good mail.

Keeps verify_reason in {smtp_valid, catch_all_detected}; drops mx_unreachable,
smtp_unknown_451/541/554 (these defer/bounce and hurt warmup reputation). Sorts
smtp_valid first so the daily slice hits verified mailboxes first.

Reads SRC, writes OUT (then swap OUT over the cron's hc_warmup_nongoogle.csv).
Parses with the csv module (the name column contains commas inside quotes, so
naive comma-splitting misparses verify_reason).
"""

import csv
from collections import Counter

SRC = "/opt/performancewest/data/hc_warmup_nongoogle.csv"
OUT = "/opt/performancewest/data/hc_warmup_nongoogle_clean.csv"
keep_reasons = {"smtp_valid", "catch_all_detected", "Y"}

with open(SRC, newline="") as f:
    r = csv.DictReader(f)
    cols = r.fieldnames
    allrows = list(r)

total = len(allrows)
print("verify_reason counts:", dict(Counter((row.get("verify_reason") or "").strip() for row in allrows)))

kept = [row for row in allrows if (row.get("verify_reason") or "").strip() in keep_reasons]
order = {"smtp_valid": 0, "catch_all_detected": 1, "Y": 2}
kept.sort(key=lambda row: order.get((row.get("verify_reason") or "").strip(), 9))

with open(OUT, "w", newline="") as f:
    w = csv.DictWriter(f, fieldnames=cols)
    w.writeheader()
    w.writerows(kept)

print(f"total={total} kept={len(kept)} dropped={total - len(kept)}")
print("kept breakdown:", dict(Counter((row.get("verify_reason") or "").strip() for row in kept)))
print("wrote", OUT)