hc refresh: propagate fresh status into the channel CSVs the cron reads

The channel CSVs (hc_warmup_nongoogle/google/week1_verified) are email-keyed
subsets of the master with extra deliverability columns (verify_ok/verify_reason).
The refresh now writes the fresh status fields (reval_due_date, days_overdue,
reval_status, leie_excluded, optout_ending, name/specialty/state) back into each,
preserving the extra columns and row membership, so a single weekly run updates
everything the campaign cron consumes -- not just the master.
This commit is contained in:
justin 2026-06-08 03:13:00 -05:00
parent 4f455475c0
commit 85dc3d5c3b

View file

@ -268,9 +268,35 @@ def main() -> int:
write_atomic(args.master, refreshed, HEADER) write_atomic(args.master, refreshed, HEADER)
log(f"wrote {args.master} ({len(refreshed)} rows)") log(f"wrote {args.master} ({len(refreshed)} rows)")
# Re-derive the channel CSVs the campaign cron reads (Google vs non-Google # Propagate the fresh status fields into the channel CSVs the campaign cron
# split is a deliverability concern, not a segment one; keep the existing # actually reads. These are email-keyed subsets of the master with extra
# split if those files exist so we don't lose warmup-cohort separation). # deliverability columns (verify_ok/verify_reason) we must preserve; we only
# overwrite the status fields the refresh owns.
REFRESHED_FIELDS = ["reval_due_date", "days_overdue", "reval_status",
"leie_excluded", "optout_ending", "name", "specialty", "state"]
by_email = {r["email"].strip().lower(): r for r in refreshed if r.get("email")}
channel_csvs = [os.path.join(args.out_dir, f) for f in
("hc_warmup_nongoogle.csv", "hc_warmup_google.csv",
"hc_warmup_week1_verified.csv")]
for path in channel_csvs:
if not os.path.exists(path):
continue
with open(path, newline="") as f:
rdr = csv.DictReader(f)
cols = rdr.fieldnames or []
rows_ch = list(rdr)
updated = 0
for r in rows_ch:
m = by_email.get(r.get("email", "").strip().lower())
if not m:
continue
for fld in REFRESHED_FIELDS:
if fld in cols and fld in m:
r[fld] = m[fld]
updated += 1
write_atomic(path, rows_ch, cols)
log(f"propagated to {os.path.basename(path)}: {updated}/{len(rows_ch)} rows updated")
return 0 return 0