diff --git a/data/hc_campaigns/hc_compliance_bundle.html b/data/hc_campaigns/hc_compliance_bundle.html index df1cd57..66277a1 100644 --- a/data/hc_campaigns/hc_compliance_bundle.html +++ b/data/hc_campaigns/hc_compliance_bundle.html @@ -24,7 +24,7 @@ - +
NPI{{ .Subscriber.Attribs.npi }}
Practice{{ .Subscriber.Attribs.detail }}
Practice{{ .Subscriber.Attribs.practice }}
Our service fee$899/yr
diff --git a/data/hc_campaigns/hc_npi_reactivation.html b/data/hc_campaigns/hc_npi_reactivation.html index bd069ce..13f93c6 100644 --- a/data/hc_campaigns/hc_npi_reactivation.html +++ b/data/hc_campaigns/hc_npi_reactivation.html @@ -24,7 +24,7 @@ - +
NPI{{ .Subscriber.Attribs.npi }}
Status{{ .Subscriber.Attribs.detail }}
Practice{{ .Subscriber.Attribs.practice }}
Our service fee$449
diff --git a/data/hc_campaigns/hc_nppes_outdated.html b/data/hc_campaigns/hc_nppes_outdated.html index 49621b0..001a1c7 100644 --- a/data/hc_campaigns/hc_nppes_outdated.html +++ b/data/hc_campaigns/hc_nppes_outdated.html @@ -24,7 +24,7 @@ - +
NPI{{ .Subscriber.Attribs.npi }}
Record{{ .Subscriber.Attribs.detail }}
Practice{{ .Subscriber.Attribs.practice }}
Our service fee$349
diff --git a/data/hc_campaigns/hc_oig_screening.html b/data/hc_campaigns/hc_oig_screening.html index 9b26de4..a2694cf 100644 --- a/data/hc_campaigns/hc_oig_screening.html +++ b/data/hc_campaigns/hc_oig_screening.html @@ -24,7 +24,7 @@ - +
NPI{{ .Subscriber.Attribs.npi }}
Practice{{ .Subscriber.Attribs.detail }}
Practice{{ .Subscriber.Attribs.practice }}
Our service fee$299
diff --git a/scripts/build_healthcare_campaigns_cron.py b/scripts/build_healthcare_campaigns_cron.py index 490dc0f..39bab9d 100644 --- a/scripts/build_healthcare_campaigns_cron.py +++ b/scripts/build_healthcare_campaigns_cron.py @@ -286,8 +286,8 @@ def main(): print(f"[hc-cron] verified_total={len(rows)}") # Split the daily slice across segments. Revalidation (the lead, richest - # data) gets the largest share; the rest share the remainder evenly so every - # program collects engagement data while warming. + # data) gets ~half; the rest share the remainder evenly. The lead reclaims + # any rounding remainder so the total never exceeds the warming-rate budget. lead = "revalidation_overdue" others = [s for s in segments if s != lead] per_seg = {} @@ -296,10 +296,17 @@ def main(): rem = total_slice - per_seg[lead] else: rem = total_slice - if others: - each = max(1, rem // len(others)) + if others and rem > 0: + base, extra = divmod(rem, len(others)) + for i, s in enumerate(others): + per_seg[s] = base + (1 if i < extra else 0) + elif others: for s in others: - per_seg[s] = each + per_seg[s] = 0 + # Reclaim any rounding remainder onto the lead so sum(per_seg) == total_slice + # exactly (never overshoot the rate cap, never silently drop budget). + if lead in per_seg: + per_seg[lead] += total_slice - sum(per_seg.values()) grand = 0 for seg_key in segments: diff --git a/scripts/hc_data_refresh.py b/scripts/hc_data_refresh.py index 17150eb..1407e39 100644 --- a/scripts/hc_data_refresh.py +++ b/scripts/hc_data_refresh.py @@ -81,6 +81,9 @@ def cms_revalidation_for(npis: list[str]) -> dict[str, dict]: today = datetime.date.today() out: dict[str, dict] = {} for i, npi in enumerate(npis, 1): + if i % 100 == 0: + log(f" cms: checked {i}/{len(npis)} NPIs") + time.sleep(0.05) # be polite to data.cms.gov (every request, not just hits) q = urllib.parse.urlencode({"filter[National Provider Identifier]": npi, "size": 1}) try: rows = http_json(f"{CMS_API}?{q}", timeout=20) @@ -102,9 +105,6 @@ def cms_revalidation_for(npis: list[str]) -> dict[str, dict]: dd_disp = d.strftime("%m/%d/%Y") overdue = (today - d).days out[npi] = {"due_date": dd_disp, "days_overdue": overdue, "overdue": overdue > 0, "on_list": True} - if i % 100 == 0: - log(f" cms: checked {i}/{len(npis)} NPIs") - time.sleep(0.05) # be polite to data.cms.gov return out @@ -242,6 +242,10 @@ def main() -> int: refreshed = [] for r in rows: npi = r["npi"].strip() + if not npi: + # No NPI to re-check; leave the row's existing status untouched. + refreshed.append(r) + continue rv = reval.get(npi) if rv is not None and not args.skip_cms: r["reval_due_date"] = rv["due_date"] @@ -250,30 +254,40 @@ def main() -> int: else ("upcoming" if rv["due_date"] else "on_list_tbd")) elif not args.skip_cms: # No longer on the revalidation list -> they've revalidated / dropped. - r["reval_status"] = "not_on_list" + # Use the same vocabulary the original list builder emits. + r["reval_status"] = "no_reval_flag" + r["reval_due_date"] = "" r["days_overdue"] = "" - if not args.skip_oig or not args.skip_sam: - r["leie_excluded"] = "1" if npi in excluded else "" + # Only rewrite the exclusion flag when OIG was actually pulled, so a + # --skip-oig run never blanks existing flags. SAM is supplemental. + if not args.skip_oig: + r["leie_excluded"] = "Y" if npi in excluded else "" refreshed.append(r) n_overdue = sum(1 for r in refreshed if r.get("reval_status") == "overdue") n_upcoming = sum(1 for r in refreshed if r.get("reval_status") == "upcoming") - n_excluded = sum(1 for r in refreshed if r.get("leie_excluded") == "1") + n_excluded = sum(1 for r in refreshed if (r.get("leie_excluded") or "").strip() not in ("", "0", "false")) log(f"refreshed: overdue={n_overdue} upcoming={n_upcoming} excluded={n_excluded}") if args.dry_run: log("dry-run, no files written") return 0 - write_atomic(args.master, refreshed, HEADER) + # Preserve any columns the master already had beyond HEADER (so we never + # silently drop data on write). HEADER first to keep the canonical order. + master_cols = list(dict.fromkeys(HEADER + [k for r in refreshed for k in r])) + write_atomic(args.master, refreshed, master_cols) log(f"wrote {args.master} ({len(refreshed)} rows)") # Propagate the fresh status fields into the channel CSVs the campaign cron # actually reads. These are email-keyed subsets of the master with extra # deliverability columns (verify_ok/verify_reason) we must preserve; we only # overwrite the status fields the refresh owns. + # The refresh OWNS these status fields; it must not touch others (notably + # optout_ending, which only the original list builder computes -- including + # it here would blank it and starve the compliance_bundle segment). REFRESHED_FIELDS = ["reval_due_date", "days_overdue", "reval_status", - "leie_excluded", "optout_ending", "name", "specialty", "state"] + "leie_excluded", "name", "specialty", "state"] by_email = {r["email"].strip().lower(): r for r in refreshed if r.get("email")} channel_csvs = [os.path.join(args.out_dir, f) for f in ("hc_warmup_nongoogle.csv", "hc_warmup_google.csv",