#!/usr/bin/env python3 """Enrich the verified NPPES institutional email list with CMS revalidation status. The institutional list (npi,email,...) is email-rich but has no revalidation data, so only the OIG-screening segment can run on it. This joins it to the CMS "Revalidation Due Date List" bulk file (revalidation_base.csv, keyed by NPI) so the revalidation_overdue / revalidation_due_soon / npi_reactivation segments can ALSO run on the full 63k -- unlocking the flagship $599 revalidation pitch against the whole institutional pool instead of the 826-row warmup master. Output adds the columns the campaign cron's selectors need: reval_due_date, days_overdue, reval_status (overdue|upcoming|not_on_list) Usage: python3 scripts/enrich_institutional_revalidation.py \ INSTITUTIONAL.csv REVALIDATION_BASE.csv OUT.csv """ from __future__ import annotations import csv import sys from datetime import date, datetime csv.field_size_limit(10_000_000) COL_NPI = "National Provider Identifier" COL_DUE = "Revalidation Due Date" COL_ADJ = "Adjusted Due Date" def parse_due(s: str): s = (s or "").strip() if not s or s.upper() == "TBD": return "TBD" if s.upper() == "TBD" else None for fmt in ("%m/%d/%Y", "%Y-%m-%d"): try: return datetime.strptime(s, fmt).date() except ValueError: continue return None def main() -> int: inst_f, reval_f, out_f = sys.argv[1:4] today = date.today() # 1) NPI -> due date from the CMS revalidation list (last/best wins). due_by_npi: dict[str, object] = {} with open(reval_f, newline="", encoding="latin-1") as f: r = csv.DictReader(f) for row in r: npi = (row.get(COL_NPI) or "").strip() if not npi or not npi.isdigit(): continue d = parse_due(row.get(COL_ADJ) or "") or parse_due(row.get(COL_DUE) or "") if d is not None: due_by_npi[npi] = d # date or "TBD" print(f"revalidation list: {len(due_by_npi):,} NPIs with a due date/TBD", file=sys.stderr) # 2) enrich the institutional rows. on_list = overdue = upcoming = 0 with open(inst_f, newline="", encoding="utf-8") as fin, \ open(out_f, "w", newline="", encoding="utf-8") as fout: reader = csv.DictReader(fin) fields = reader.fieldnames + ["reval_due_date", "days_overdue", "reval_status"] w = csv.DictWriter(fout, fieldnames=fields) w.writeheader() total = 0 for row in reader: total += 1 npi = (row.get("npi") or "").strip() d = due_by_npi.get(npi) if d is None: row["reval_due_date"] = "" row["days_overdue"] = "" row["reval_status"] = "not_on_list" elif d == "TBD": on_list += 1 row["reval_due_date"] = "" row["days_overdue"] = "0" row["reval_status"] = "upcoming" upcoming += 1 else: on_list += 1 od = (today - d).days row["reval_due_date"] = d.strftime("%m/%d/%Y") row["days_overdue"] = str(od) row["reval_status"] = "overdue" if od > 0 else "upcoming" if od > 0: overdue += 1 else: upcoming += 1 w.writerow(row) print(f"institutional rows: {total:,}", file=sys.stderr) print(f" on revalidation list: {on_list:,} " f"(overdue={overdue:,}, upcoming={upcoming:,})", file=sys.stderr) print(f" -> {out_f}", file=sys.stderr) return 0 if __name__ == "__main__": raise SystemExit(main())