diff --git a/scripts/hc_data_refresh.py b/scripts/hc_data_refresh.py index 03f1041..17150eb 100644 --- a/scripts/hc_data_refresh.py +++ b/scripts/hc_data_refresh.py @@ -268,9 +268,35 @@ def main() -> int: write_atomic(args.master, refreshed, HEADER) log(f"wrote {args.master} ({len(refreshed)} rows)") - # Re-derive the channel CSVs the campaign cron reads (Google vs non-Google - # split is a deliverability concern, not a segment one; keep the existing - # split if those files exist so we don't lose warmup-cohort separation). + # Propagate the fresh status fields into the channel CSVs the campaign cron + # actually reads. These are email-keyed subsets of the master with extra + # deliverability columns (verify_ok/verify_reason) we must preserve; we only + # overwrite the status fields the refresh owns. + REFRESHED_FIELDS = ["reval_due_date", "days_overdue", "reval_status", + "leie_excluded", "optout_ending", "name", "specialty", "state"] + by_email = {r["email"].strip().lower(): r for r in refreshed if r.get("email")} + channel_csvs = [os.path.join(args.out_dir, f) for f in + ("hc_warmup_nongoogle.csv", "hc_warmup_google.csv", + "hc_warmup_week1_verified.csv")] + for path in channel_csvs: + if not os.path.exists(path): + continue + with open(path, newline="") as f: + rdr = csv.DictReader(f) + cols = rdr.fieldnames or [] + rows_ch = list(rdr) + updated = 0 + for r in rows_ch: + m = by_email.get(r.get("email", "").strip().lower()) + if not m: + continue + for fld in REFRESHED_FIELDS: + if fld in cols and fld in m: + r[fld] = m[fld] + updated += 1 + write_atomic(path, rows_ch, cols) + log(f"propagated to {os.path.basename(path)}: {updated}/{len(rows_ch)} rows updated") + return 0