From 85dc3d5c3b56daa8aeab50480117c78408958ede Mon Sep 17 00:00:00 2001
From: justin <justin@liquidator.optimal-reality.com>
Date: Mon, 8 Jun 2026 03:13:00 -0500
Subject: [PATCH] hc refresh: propagate fresh status into the channel CSVs the
 cron reads

The channel CSVs (hc_warmup_nongoogle/google/week1_verified) are email-keyed
subsets of the master with extra deliverability columns (verify_ok/verify_reason).
The refresh now writes the fresh status fields (reval_due_date, days_overdue,
reval_status, leie_excluded, optout_ending, name/specialty/state) back into each,
preserving the extra columns and row membership, so a single weekly run updates
everything the campaign cron consumes -- not just the master.
---
 scripts/hc_data_refresh.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/scripts/hc_data_refresh.py b/scripts/hc_data_refresh.py
index 03f1041..17150eb 100644
--- a/scripts/hc_data_refresh.py
+++ b/scripts/hc_data_refresh.py
@@ -268,9 +268,35 @@ def main() -> int:
     write_atomic(args.master, refreshed, HEADER)
     log(f"wrote {args.master} ({len(refreshed)} rows)")
 
-    # Re-derive the channel CSVs the campaign cron reads (Google vs non-Google
-    # split is a deliverability concern, not a segment one; keep the existing
-    # split if those files exist so we don't lose warmup-cohort separation).
+    # Propagate the fresh status fields into the channel CSVs the campaign cron
+    # actually reads. These are email-keyed subsets of the master with extra
+    # deliverability columns (verify_ok/verify_reason) we must preserve; we only
+    # overwrite the status fields the refresh owns.
+    REFRESHED_FIELDS = ["reval_due_date", "days_overdue", "reval_status",
+                        "leie_excluded", "optout_ending", "name", "specialty", "state"]
+    by_email = {r["email"].strip().lower(): r for r in refreshed if r.get("email")}
+    channel_csvs = [os.path.join(args.out_dir, f) for f in
+                    ("hc_warmup_nongoogle.csv", "hc_warmup_google.csv",
+                     "hc_warmup_week1_verified.csv")]
+    for path in channel_csvs:
+        if not os.path.exists(path):
+            continue
+        with open(path, newline="") as f:
+            rdr = csv.DictReader(f)
+            cols = rdr.fieldnames or []
+            rows_ch = list(rdr)
+        updated = 0
+        for r in rows_ch:
+            m = by_email.get(r.get("email", "").strip().lower())
+            if not m:
+                continue
+            for fld in REFRESHED_FIELDS:
+                if fld in cols and fld in m:
+                    r[fld] = m[fld]
+            updated += 1
+        write_atomic(path, rows_ch, cols)
+        log(f"propagated to {os.path.basename(path)}: {updated}/{len(rows_ch)} rows updated")
+
     return 0