From b73edadb89c7522996ed8631930000be421b20d6 Mon Sep 17 00:00:00 2001 From: justin Date: Sun, 14 Jun 2026 01:07:40 -0500 Subject: [PATCH] hc: unlock the full 62k verified institutional pool for broad offers The OIG-screening + NPPES-update segments were effectively limited to ~1,437 providers because the warmup 'any' selector excluded not-on-reval-list rows as a deliverability proxy -- but that excludes almost the ENTIRE institutional list (org NPIs aren't individual Medicare enrollees). Since we already SMTP-verified all 63k inboxes, add an 'institutional_verified' selector that trusts our own verification instead of reval-list presence. Result: OIG + NPPES-update now address 62,422 (43x more), giving multiple broad offers to test engagement on. - enrich_institutional_revalidation.py: fast local join of the institutional list to the CMS Revalidation Due Date List bulk file (revalidation_base.csv) by NPI -> adds reval_due_date/days_overdue/reval_status. ~1,437 are genuine Medicare enrollees (197 overdue / 164 due-soon) -> flagship $599 reval pitch. - npi_reactivation stays on leie_or_deactivated (only REAL deactivations -- no false 'your NPI is deactivated' claims to active orgs). --- scripts/build_healthcare_campaigns.py | 4 +- scripts/build_healthcare_campaigns_cron.py | 13 +++ scripts/enrich_institutional_revalidation.py | 102 +++++++++++++++++++ 3 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 scripts/enrich_institutional_revalidation.py diff --git a/scripts/build_healthcare_campaigns.py b/scripts/build_healthcare_campaigns.py index 628727a..bb89d24 100644 --- a/scripts/build_healthcare_campaigns.py +++ b/scripts/build_healthcare_campaigns.py @@ -84,7 +84,7 @@ SEGMENTS = { "price": "$349", "list_name": "HC Warmup - NPPES Update", "campaign_name": "HC Warmup - NPPES Outdated", - "selector": "reval_upcoming", + "selector": "institutional_verified", }, "oig_screening": { "subject": "Are you screening for OIG / SAM exclusions?", @@ -93,7 +93,7 @@ SEGMENTS = { "price": "$299", "list_name": "HC Warmup - OIG Screening", "campaign_name": "HC Warmup - OIG Screening", - "selector": "any", + "selector": "institutional_verified", }, "compliance_bundle": { "subject": "Get your provider compliance handled for the year", diff --git a/scripts/build_healthcare_campaigns_cron.py b/scripts/build_healthcare_campaigns_cron.py index 13b209c..3e716c4 100644 --- a/scripts/build_healthcare_campaigns_cron.py +++ b/scripts/build_healthcare_campaigns_cron.py @@ -338,6 +338,19 @@ def row_matches(seg_key: str, r: dict) -> bool: if status == "overdue" and (od is None or od > WARMUP_OVERDUE_MAX): return False return True + if sel == "institutional_verified": + # For the freshly SMTP-VERIFIED institutional list, "not on the CMS + # revalidation list" does NOT mean undeliverable -- it just means the org + # NPI is not an individual Medicare enrollee. We already proved the inbox + # is live (verify_ok), so trust that instead of using reval-list presence + # as a deliverability proxy. Still hold out the heavily-overdue (stale) + # individual enrollees that DO appear, to protect the warming IP. + if (str(r.get("verify_ok", "")).strip().upper() not in ("Y", "YES", "TRUE", "1", "")): + return False + od = _overdue_days(r) + if status == "overdue" and od is not None and od > WARMUP_OVERDUE_MAX: + return False + return True return False diff --git a/scripts/enrich_institutional_revalidation.py b/scripts/enrich_institutional_revalidation.py new file mode 100644 index 0000000..f279e6e --- /dev/null +++ b/scripts/enrich_institutional_revalidation.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""Enrich the verified NPPES institutional email list with CMS revalidation status. + +The institutional list (npi,email,...) is email-rich but has no revalidation +data, so only the OIG-screening segment can run on it. This joins it to the CMS +"Revalidation Due Date List" bulk file (revalidation_base.csv, keyed by NPI) so +the revalidation_overdue / revalidation_due_soon / npi_reactivation segments can +ALSO run on the full 63k -- unlocking the flagship $599 revalidation pitch +against the whole institutional pool instead of the 826-row warmup master. + +Output adds the columns the campaign cron's selectors need: + reval_due_date, days_overdue, reval_status (overdue|upcoming|not_on_list) + +Usage: + python3 scripts/enrich_institutional_revalidation.py \ + INSTITUTIONAL.csv REVALIDATION_BASE.csv OUT.csv +""" +from __future__ import annotations +import csv +import sys +from datetime import date, datetime + +csv.field_size_limit(10_000_000) + +COL_NPI = "National Provider Identifier" +COL_DUE = "Revalidation Due Date" +COL_ADJ = "Adjusted Due Date" + + +def parse_due(s: str): + s = (s or "").strip() + if not s or s.upper() == "TBD": + return "TBD" if s.upper() == "TBD" else None + for fmt in ("%m/%d/%Y", "%Y-%m-%d"): + try: + return datetime.strptime(s, fmt).date() + except ValueError: + continue + return None + + +def main() -> int: + inst_f, reval_f, out_f = sys.argv[1:4] + today = date.today() + + # 1) NPI -> due date from the CMS revalidation list (last/best wins). + due_by_npi: dict[str, object] = {} + with open(reval_f, newline="", encoding="latin-1") as f: + r = csv.DictReader(f) + for row in r: + npi = (row.get(COL_NPI) or "").strip() + if not npi or not npi.isdigit(): + continue + d = parse_due(row.get(COL_ADJ) or "") or parse_due(row.get(COL_DUE) or "") + if d is not None: + due_by_npi[npi] = d # date or "TBD" + print(f"revalidation list: {len(due_by_npi):,} NPIs with a due date/TBD", file=sys.stderr) + + # 2) enrich the institutional rows. + on_list = overdue = upcoming = 0 + with open(inst_f, newline="", encoding="utf-8") as fin, \ + open(out_f, "w", newline="", encoding="utf-8") as fout: + reader = csv.DictReader(fin) + fields = reader.fieldnames + ["reval_due_date", "days_overdue", "reval_status"] + w = csv.DictWriter(fout, fieldnames=fields) + w.writeheader() + total = 0 + for row in reader: + total += 1 + npi = (row.get("npi") or "").strip() + d = due_by_npi.get(npi) + if d is None: + row["reval_due_date"] = "" + row["days_overdue"] = "" + row["reval_status"] = "not_on_list" + elif d == "TBD": + on_list += 1 + row["reval_due_date"] = "" + row["days_overdue"] = "0" + row["reval_status"] = "upcoming" + upcoming += 1 + else: + on_list += 1 + od = (today - d).days + row["reval_due_date"] = d.strftime("%m/%d/%Y") + row["days_overdue"] = str(od) + row["reval_status"] = "overdue" if od > 0 else "upcoming" + if od > 0: + overdue += 1 + else: + upcoming += 1 + w.writerow(row) + + print(f"institutional rows: {total:,}", file=sys.stderr) + print(f" on revalidation list: {on_list:,} " + f"(overdue={overdue:,}, upcoming={upcoming:,})", file=sys.stderr) + print(f" -> {out_f}", file=sys.stderr) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())