fix(npi): lenient CSV decoding in companion loader (CMS exports have stray latin-1 bytes)
This commit is contained in:
parent
157c7a2571
commit
e32193352b
1 changed files with 8 additions and 3 deletions
|
|
@ -27,6 +27,11 @@ from psycopg2.extras import execute_values
|
|||
|
||||
DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://pw:pw@localhost:5432/performancewest")
|
||||
|
||||
# CMS/OIG exports are not always clean UTF-8 (stray latin-1 bytes like 0xa0).
|
||||
# Decode leniently so a few bad bytes don't abort a multi-hundred-MB load.
|
||||
def open_csv(path):
|
||||
return open(path, newline="", encoding="utf-8-sig", errors="replace")
|
||||
|
||||
|
||||
def parse_date(s):
|
||||
if not s:
|
||||
|
|
@ -49,7 +54,7 @@ def clean_npi(s):
|
|||
|
||||
def load_revalidation(conn, path):
|
||||
rows = []
|
||||
with open(path, newline="", encoding="utf-8-sig") as f:
|
||||
with open_csv(path) as f:
|
||||
for r in csv.DictReader(f):
|
||||
npi = (r.get("National Provider Identifier") or "").strip()
|
||||
if not (npi.isdigit() and len(npi) == 10):
|
||||
|
|
@ -84,7 +89,7 @@ def load_revalidation(conn, path):
|
|||
|
||||
def load_exclusions(conn, path):
|
||||
rows = []
|
||||
with open(path, newline="", encoding="utf-8-sig") as f:
|
||||
with open_csv(path) as f:
|
||||
for r in csv.DictReader(f):
|
||||
rows.append((
|
||||
clean_npi(r.get("NPI")),
|
||||
|
|
@ -114,7 +119,7 @@ def load_exclusions(conn, path):
|
|||
|
||||
def load_optout(conn, path):
|
||||
rows = []
|
||||
with open(path, newline="", encoding="utf-8-sig") as f:
|
||||
with open_csv(path) as f:
|
||||
for r in csv.DictReader(f):
|
||||
npi = (r.get("npi") or r.get("NPI") or "").strip()
|
||||
if not (npi.isdigit() and len(npi) == 10):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue