From 85dc3d5c3b56daa8aeab50480117c78408958ede Mon Sep 17 00:00:00 2001 From: justin Date: Mon, 8 Jun 2026 03:13:00 -0500 Subject: [PATCH] hc refresh: propagate fresh status into the channel CSVs the cron reads The channel CSVs (hc_warmup_nongoogle/google/week1_verified) are email-keyed subsets of the master with extra deliverability columns (verify_ok/verify_reason). The refresh now writes the fresh status fields (reval_due_date, days_overdue, reval_status, leie_excluded, optout_ending, name/specialty/state) back into each, preserving the extra columns and row membership, so a single weekly run updates everything the campaign cron consumes -- not just the master. --- scripts/hc_data_refresh.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/scripts/hc_data_refresh.py b/scripts/hc_data_refresh.py index 03f1041..17150eb 100644 --- a/scripts/hc_data_refresh.py +++ b/scripts/hc_data_refresh.py @@ -268,9 +268,35 @@ def main() -> int: write_atomic(args.master, refreshed, HEADER) log(f"wrote {args.master} ({len(refreshed)} rows)") - # Re-derive the channel CSVs the campaign cron reads (Google vs non-Google - # split is a deliverability concern, not a segment one; keep the existing - # split if those files exist so we don't lose warmup-cohort separation). + # Propagate the fresh status fields into the channel CSVs the campaign cron + # actually reads. These are email-keyed subsets of the master with extra + # deliverability columns (verify_ok/verify_reason) we must preserve; we only + # overwrite the status fields the refresh owns. + REFRESHED_FIELDS = ["reval_due_date", "days_overdue", "reval_status", + "leie_excluded", "optout_ending", "name", "specialty", "state"] + by_email = {r["email"].strip().lower(): r for r in refreshed if r.get("email")} + channel_csvs = [os.path.join(args.out_dir, f) for f in + ("hc_warmup_nongoogle.csv", "hc_warmup_google.csv", + "hc_warmup_week1_verified.csv")] + for path in channel_csvs: + if not os.path.exists(path): + continue + with open(path, newline="") as f: + rdr = csv.DictReader(f) + cols = rdr.fieldnames or [] + rows_ch = list(rdr) + updated = 0 + for r in rows_ch: + m = by_email.get(r.get("email", "").strip().lower()) + if not m: + continue + for fld in REFRESHED_FIELDS: + if fld in cols and fld in m: + r[fld] = m[fld] + updated += 1 + write_atomic(path, rows_ch, cols) + log(f"propagated to {os.path.basename(path)}: {updated}/{len(rows_ch)} rows updated") + return 0