diff --git a/data/hc_campaigns/hc_compliance_bundle.html b/data/hc_campaigns/hc_compliance_bundle.html
index df1cd57..66277a1 100644
--- a/data/hc_campaigns/hc_compliance_bundle.html
+++ b/data/hc_campaigns/hc_compliance_bundle.html
@@ -24,7 +24,7 @@
| NPI | {{ .Subscriber.Attribs.npi }} |
- | Practice | {{ .Subscriber.Attribs.detail }} |
+ | Practice | {{ .Subscriber.Attribs.practice }} |
| Our service fee | $899/yr |
diff --git a/data/hc_campaigns/hc_npi_reactivation.html b/data/hc_campaigns/hc_npi_reactivation.html
index bd069ce..13f93c6 100644
--- a/data/hc_campaigns/hc_npi_reactivation.html
+++ b/data/hc_campaigns/hc_npi_reactivation.html
@@ -24,7 +24,7 @@
| NPI | {{ .Subscriber.Attribs.npi }} |
- | Status | {{ .Subscriber.Attribs.detail }} |
+ | Practice | {{ .Subscriber.Attribs.practice }} |
| Our service fee | $449 |
diff --git a/data/hc_campaigns/hc_nppes_outdated.html b/data/hc_campaigns/hc_nppes_outdated.html
index 49621b0..001a1c7 100644
--- a/data/hc_campaigns/hc_nppes_outdated.html
+++ b/data/hc_campaigns/hc_nppes_outdated.html
@@ -24,7 +24,7 @@
| NPI | {{ .Subscriber.Attribs.npi }} |
- | Record | {{ .Subscriber.Attribs.detail }} |
+ | Practice | {{ .Subscriber.Attribs.practice }} |
| Our service fee | $349 |
diff --git a/data/hc_campaigns/hc_oig_screening.html b/data/hc_campaigns/hc_oig_screening.html
index 9b26de4..a2694cf 100644
--- a/data/hc_campaigns/hc_oig_screening.html
+++ b/data/hc_campaigns/hc_oig_screening.html
@@ -24,7 +24,7 @@
| NPI | {{ .Subscriber.Attribs.npi }} |
- | Practice | {{ .Subscriber.Attribs.detail }} |
+ | Practice | {{ .Subscriber.Attribs.practice }} |
| Our service fee | $299 |
diff --git a/scripts/build_healthcare_campaigns_cron.py b/scripts/build_healthcare_campaigns_cron.py
index 490dc0f..39bab9d 100644
--- a/scripts/build_healthcare_campaigns_cron.py
+++ b/scripts/build_healthcare_campaigns_cron.py
@@ -286,8 +286,8 @@ def main():
print(f"[hc-cron] verified_total={len(rows)}")
# Split the daily slice across segments. Revalidation (the lead, richest
- # data) gets the largest share; the rest share the remainder evenly so every
- # program collects engagement data while warming.
+ # data) gets ~half; the rest share the remainder evenly. The lead reclaims
+ # any rounding remainder so the total never exceeds the warming-rate budget.
lead = "revalidation_overdue"
others = [s for s in segments if s != lead]
per_seg = {}
@@ -296,10 +296,17 @@ def main():
rem = total_slice - per_seg[lead]
else:
rem = total_slice
- if others:
- each = max(1, rem // len(others))
+ if others and rem > 0:
+ base, extra = divmod(rem, len(others))
+ for i, s in enumerate(others):
+ per_seg[s] = base + (1 if i < extra else 0)
+ elif others:
for s in others:
- per_seg[s] = each
+ per_seg[s] = 0
+ # Reclaim any rounding remainder onto the lead so sum(per_seg) == total_slice
+ # exactly (never overshoot the rate cap, never silently drop budget).
+ if lead in per_seg:
+ per_seg[lead] += total_slice - sum(per_seg.values())
grand = 0
for seg_key in segments:
diff --git a/scripts/hc_data_refresh.py b/scripts/hc_data_refresh.py
index 17150eb..1407e39 100644
--- a/scripts/hc_data_refresh.py
+++ b/scripts/hc_data_refresh.py
@@ -81,6 +81,9 @@ def cms_revalidation_for(npis: list[str]) -> dict[str, dict]:
today = datetime.date.today()
out: dict[str, dict] = {}
for i, npi in enumerate(npis, 1):
+ if i % 100 == 0:
+ log(f" cms: checked {i}/{len(npis)} NPIs")
+ time.sleep(0.05) # be polite to data.cms.gov (every request, not just hits)
q = urllib.parse.urlencode({"filter[National Provider Identifier]": npi, "size": 1})
try:
rows = http_json(f"{CMS_API}?{q}", timeout=20)
@@ -102,9 +105,6 @@ def cms_revalidation_for(npis: list[str]) -> dict[str, dict]:
dd_disp = d.strftime("%m/%d/%Y")
overdue = (today - d).days
out[npi] = {"due_date": dd_disp, "days_overdue": overdue, "overdue": overdue > 0, "on_list": True}
- if i % 100 == 0:
- log(f" cms: checked {i}/{len(npis)} NPIs")
- time.sleep(0.05) # be polite to data.cms.gov
return out
@@ -242,6 +242,10 @@ def main() -> int:
refreshed = []
for r in rows:
npi = r["npi"].strip()
+ if not npi:
+ # No NPI to re-check; leave the row's existing status untouched.
+ refreshed.append(r)
+ continue
rv = reval.get(npi)
if rv is not None and not args.skip_cms:
r["reval_due_date"] = rv["due_date"]
@@ -250,30 +254,40 @@ def main() -> int:
else ("upcoming" if rv["due_date"] else "on_list_tbd"))
elif not args.skip_cms:
# No longer on the revalidation list -> they've revalidated / dropped.
- r["reval_status"] = "not_on_list"
+ # Use the same vocabulary the original list builder emits.
+ r["reval_status"] = "no_reval_flag"
+ r["reval_due_date"] = ""
r["days_overdue"] = ""
- if not args.skip_oig or not args.skip_sam:
- r["leie_excluded"] = "1" if npi in excluded else ""
+ # Only rewrite the exclusion flag when OIG was actually pulled, so a
+ # --skip-oig run never blanks existing flags. SAM is supplemental.
+ if not args.skip_oig:
+ r["leie_excluded"] = "Y" if npi in excluded else ""
refreshed.append(r)
n_overdue = sum(1 for r in refreshed if r.get("reval_status") == "overdue")
n_upcoming = sum(1 for r in refreshed if r.get("reval_status") == "upcoming")
- n_excluded = sum(1 for r in refreshed if r.get("leie_excluded") == "1")
+ n_excluded = sum(1 for r in refreshed if (r.get("leie_excluded") or "").strip() not in ("", "0", "false"))
log(f"refreshed: overdue={n_overdue} upcoming={n_upcoming} excluded={n_excluded}")
if args.dry_run:
log("dry-run, no files written")
return 0
- write_atomic(args.master, refreshed, HEADER)
+ # Preserve any columns the master already had beyond HEADER (so we never
+ # silently drop data on write). HEADER first to keep the canonical order.
+ master_cols = list(dict.fromkeys(HEADER + [k for r in refreshed for k in r]))
+ write_atomic(args.master, refreshed, master_cols)
log(f"wrote {args.master} ({len(refreshed)} rows)")
# Propagate the fresh status fields into the channel CSVs the campaign cron
# actually reads. These are email-keyed subsets of the master with extra
# deliverability columns (verify_ok/verify_reason) we must preserve; we only
# overwrite the status fields the refresh owns.
+ # The refresh OWNS these status fields; it must not touch others (notably
+ # optout_ending, which only the original list builder computes -- including
+ # it here would blank it and starve the compliance_bundle segment).
REFRESHED_FIELDS = ["reval_due_date", "days_overdue", "reval_status",
- "leie_excluded", "optout_ending", "name", "specialty", "state"]
+ "leie_excluded", "name", "specialty", "state"]
by_email = {r["email"].strip().lower(): r for r in refreshed if r.get("email")}
channel_csvs = [os.path.join(args.out_dir, f) for f in
("hc_warmup_nongoogle.csv", "hc_warmup_google.csv",