fix(npi): lenient CSV decoding in companion loader (CMS exports have stray latin-1 bytes)
This commit is contained in:
parent
157c7a2571
commit
e32193352b
1 changed files with 8 additions and 3 deletions
|
|
@ -27,6 +27,11 @@ from psycopg2.extras import execute_values
|
||||||
|
|
||||||
DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://pw:pw@localhost:5432/performancewest")
|
DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://pw:pw@localhost:5432/performancewest")
|
||||||
|
|
||||||
|
# CMS/OIG exports are not always clean UTF-8 (stray latin-1 bytes like 0xa0).
|
||||||
|
# Decode leniently so a few bad bytes don't abort a multi-hundred-MB load.
|
||||||
|
def open_csv(path):
|
||||||
|
return open(path, newline="", encoding="utf-8-sig", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
def parse_date(s):
|
def parse_date(s):
|
||||||
if not s:
|
if not s:
|
||||||
|
|
@ -49,7 +54,7 @@ def clean_npi(s):
|
||||||
|
|
||||||
def load_revalidation(conn, path):
|
def load_revalidation(conn, path):
|
||||||
rows = []
|
rows = []
|
||||||
with open(path, newline="", encoding="utf-8-sig") as f:
|
with open_csv(path) as f:
|
||||||
for r in csv.DictReader(f):
|
for r in csv.DictReader(f):
|
||||||
npi = (r.get("National Provider Identifier") or "").strip()
|
npi = (r.get("National Provider Identifier") or "").strip()
|
||||||
if not (npi.isdigit() and len(npi) == 10):
|
if not (npi.isdigit() and len(npi) == 10):
|
||||||
|
|
@ -84,7 +89,7 @@ def load_revalidation(conn, path):
|
||||||
|
|
||||||
def load_exclusions(conn, path):
|
def load_exclusions(conn, path):
|
||||||
rows = []
|
rows = []
|
||||||
with open(path, newline="", encoding="utf-8-sig") as f:
|
with open_csv(path) as f:
|
||||||
for r in csv.DictReader(f):
|
for r in csv.DictReader(f):
|
||||||
rows.append((
|
rows.append((
|
||||||
clean_npi(r.get("NPI")),
|
clean_npi(r.get("NPI")),
|
||||||
|
|
@ -114,7 +119,7 @@ def load_exclusions(conn, path):
|
||||||
|
|
||||||
def load_optout(conn, path):
|
def load_optout(conn, path):
|
||||||
rows = []
|
rows = []
|
||||||
with open(path, newline="", encoding="utf-8-sig") as f:
|
with open_csv(path) as f:
|
||||||
for r in csv.DictReader(f):
|
for r in csv.DictReader(f):
|
||||||
npi = (r.get("npi") or r.get("NPI") or "").strip()
|
npi = (r.get("npi") or r.get("NPI") or "").strip()
|
||||||
if not (npi.isdigit() and len(npi) == 10):
|
if not (npi.isdigit() and len(npi) == 10):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue