hc: unlock the full 62k verified institutional pool for broad offers

The OIG-screening + NPPES-update segments were effectively limited to ~1,437
providers because the warmup 'any' selector excluded not-on-reval-list rows as a
deliverability proxy -- but that excludes almost the ENTIRE institutional list
(org NPIs aren't individual Medicare enrollees). Since we already SMTP-verified
all 63k inboxes, add an 'institutional_verified' selector that trusts our own
verification instead of reval-list presence. Result: OIG + NPPES-update now
address 62,422 (43x more), giving multiple broad offers to test engagement on.

- enrich_institutional_revalidation.py: fast local join of the institutional
  list to the CMS Revalidation Due Date List bulk file (revalidation_base.csv)
  by NPI -> adds reval_due_date/days_overdue/reval_status. ~1,437 are genuine
  Medicare enrollees (197 overdue / 164 due-soon) -> flagship $599 reval pitch.
- npi_reactivation stays on leie_or_deactivated (only REAL deactivations -- no
  false 'your NPI is deactivated' claims to active orgs).
This commit is contained in:
justin 2026-06-14 01:07:40 -05:00
parent 792f5e948f
commit b73edadb89
3 changed files with 117 additions and 2 deletions

View file

@ -84,7 +84,7 @@ SEGMENTS = {
"price": "$349", "price": "$349",
"list_name": "HC Warmup - NPPES Update", "list_name": "HC Warmup - NPPES Update",
"campaign_name": "HC Warmup - NPPES Outdated", "campaign_name": "HC Warmup - NPPES Outdated",
"selector": "reval_upcoming", "selector": "institutional_verified",
}, },
"oig_screening": { "oig_screening": {
"subject": "Are you screening for OIG / SAM exclusions?", "subject": "Are you screening for OIG / SAM exclusions?",
@ -93,7 +93,7 @@ SEGMENTS = {
"price": "$299", "price": "$299",
"list_name": "HC Warmup - OIG Screening", "list_name": "HC Warmup - OIG Screening",
"campaign_name": "HC Warmup - OIG Screening", "campaign_name": "HC Warmup - OIG Screening",
"selector": "any", "selector": "institutional_verified",
}, },
"compliance_bundle": { "compliance_bundle": {
"subject": "Get your provider compliance handled for the year", "subject": "Get your provider compliance handled for the year",

View file

@ -338,6 +338,19 @@ def row_matches(seg_key: str, r: dict) -> bool:
if status == "overdue" and (od is None or od > WARMUP_OVERDUE_MAX): if status == "overdue" and (od is None or od > WARMUP_OVERDUE_MAX):
return False return False
return True return True
if sel == "institutional_verified":
# For the freshly SMTP-VERIFIED institutional list, "not on the CMS
# revalidation list" does NOT mean undeliverable -- it just means the org
# NPI is not an individual Medicare enrollee. We already proved the inbox
# is live (verify_ok), so trust that instead of using reval-list presence
# as a deliverability proxy. Still hold out the heavily-overdue (stale)
# individual enrollees that DO appear, to protect the warming IP.
if (str(r.get("verify_ok", "")).strip().upper() not in ("Y", "YES", "TRUE", "1", "")):
return False
od = _overdue_days(r)
if status == "overdue" and od is not None and od > WARMUP_OVERDUE_MAX:
return False
return True
return False return False

View file

@ -0,0 +1,102 @@
#!/usr/bin/env python3
"""Enrich the verified NPPES institutional email list with CMS revalidation status.
The institutional list (npi,email,...) is email-rich but has no revalidation
data, so only the OIG-screening segment can run on it. This joins it to the CMS
"Revalidation Due Date List" bulk file (revalidation_base.csv, keyed by NPI) so
the revalidation_overdue / revalidation_due_soon / npi_reactivation segments can
ALSO run on the full 63k -- unlocking the flagship $599 revalidation pitch
against the whole institutional pool instead of the 826-row warmup master.
Output adds the columns the campaign cron's selectors need:
reval_due_date, days_overdue, reval_status (overdue|upcoming|not_on_list)
Usage:
python3 scripts/enrich_institutional_revalidation.py \
INSTITUTIONAL.csv REVALIDATION_BASE.csv OUT.csv
"""
from __future__ import annotations
import csv
import sys
from datetime import date, datetime
csv.field_size_limit(10_000_000)
COL_NPI = "National Provider Identifier"
COL_DUE = "Revalidation Due Date"
COL_ADJ = "Adjusted Due Date"
def parse_due(s: str):
s = (s or "").strip()
if not s or s.upper() == "TBD":
return "TBD" if s.upper() == "TBD" else None
for fmt in ("%m/%d/%Y", "%Y-%m-%d"):
try:
return datetime.strptime(s, fmt).date()
except ValueError:
continue
return None
def main() -> int:
inst_f, reval_f, out_f = sys.argv[1:4]
today = date.today()
# 1) NPI -> due date from the CMS revalidation list (last/best wins).
due_by_npi: dict[str, object] = {}
with open(reval_f, newline="", encoding="latin-1") as f:
r = csv.DictReader(f)
for row in r:
npi = (row.get(COL_NPI) or "").strip()
if not npi or not npi.isdigit():
continue
d = parse_due(row.get(COL_ADJ) or "") or parse_due(row.get(COL_DUE) or "")
if d is not None:
due_by_npi[npi] = d # date or "TBD"
print(f"revalidation list: {len(due_by_npi):,} NPIs with a due date/TBD", file=sys.stderr)
# 2) enrich the institutional rows.
on_list = overdue = upcoming = 0
with open(inst_f, newline="", encoding="utf-8") as fin, \
open(out_f, "w", newline="", encoding="utf-8") as fout:
reader = csv.DictReader(fin)
fields = reader.fieldnames + ["reval_due_date", "days_overdue", "reval_status"]
w = csv.DictWriter(fout, fieldnames=fields)
w.writeheader()
total = 0
for row in reader:
total += 1
npi = (row.get("npi") or "").strip()
d = due_by_npi.get(npi)
if d is None:
row["reval_due_date"] = ""
row["days_overdue"] = ""
row["reval_status"] = "not_on_list"
elif d == "TBD":
on_list += 1
row["reval_due_date"] = ""
row["days_overdue"] = "0"
row["reval_status"] = "upcoming"
upcoming += 1
else:
on_list += 1
od = (today - d).days
row["reval_due_date"] = d.strftime("%m/%d/%Y")
row["days_overdue"] = str(od)
row["reval_status"] = "overdue" if od > 0 else "upcoming"
if od > 0:
overdue += 1
else:
upcoming += 1
w.writerow(row)
print(f"institutional rows: {total:,}", file=sys.stderr)
print(f" on revalidation list: {on_list:,} "
f"(overdue={overdue:,}, upcoming={upcoming:,})", file=sys.stderr)
print(f" -> {out_f}", file=sys.stderr)
return 0
if __name__ == "__main__":
raise SystemExit(main())