61 lines
2.4 KiB
Python
61 lines
2.4 KiB
Python
"""Unit tests for the healthcare email-stream classifier.
|
|
|
|
Run: python3 -m scripts.test_healthcare_email_streams (or pytest)
|
|
|
|
Guards the subtle case that motivated the two-tier DIRECT detection: a naive
|
|
substring "direct" match wrongly parked real "Direct Primary Care" / counseling
|
|
practices (registrable domain merely contains the word) into the undeliverable
|
|
HISP pile. Direct/HISP gateways instead use "direct"/"hisp" as a whole DNS label.
|
|
"""
|
|
|
|
from scripts.healthcare_email_streams import classify
|
|
|
|
CASES = [
|
|
# Real Direct-Primary-Care / counseling practices -> institutional
|
|
("chelsea@arthurdirectcare.com", "institutional"),
|
|
("bassi@valleydirectprimarycare.com", "institutional"),
|
|
("megan@newdirectionscounselingservices.com", "institutional"),
|
|
("john@islanddirectprimarycare.com", "institutional"),
|
|
("kamlesh@mydirectcare.com", "institutional"),
|
|
("allison@truedirectioncounseling.com", "institutional"),
|
|
("marty@holtondirectcare.com", "institutional"),
|
|
("sbass@newdirectionsnonemergency.org", "institutional"),
|
|
("info@consumerdirectcare.com", "institutional"),
|
|
("x@rehabdirectives.com", "institutional"),
|
|
# Genuine Direct/HISP gateways -> direct (parked)
|
|
("x@direct.novanthealth.org", "direct"),
|
|
("x@CarolinasHealthcareSystem.direct-ci.com", "direct"),
|
|
("x@cfp.directbygreenway.com", "direct"),
|
|
("x@foo.4693.direct.athenahealth.com", "direct"),
|
|
("x@directHISP.wakemed.org", "direct"),
|
|
("x@boss.directak.net", "direct"),
|
|
("x@hisp.bryanhealth.org", "direct"),
|
|
("x@ehrdirect.mayoclinicmsg.org", "direct"),
|
|
("x@directaddress.net", "direct"),
|
|
("x@negaidx.allscriptsdirect.net", "direct"),
|
|
("x@mmiller@lickingmemorial.medicity.net".replace("mmiller@", ""), "direct"),
|
|
("x@foo.nextgenshare.com", "direct"),
|
|
# Consumer / institutional / excluded / invalid
|
|
("drsmith@gmail.com", "consumer"),
|
|
("info@smallclinic.com", "institutional"),
|
|
("x@somehospital.va.gov", "excluded"),
|
|
("x@base.health.mil", "excluded"),
|
|
("not-an-email", "invalid"),
|
|
("", "invalid"),
|
|
]
|
|
|
|
|
|
def test_classify():
|
|
failures = []
|
|
for email, expected in CASES:
|
|
got = classify(email)
|
|
if got != expected:
|
|
failures.append((email, got, expected))
|
|
assert not failures, "Misclassified: " + "; ".join(
|
|
f"{e} -> {g} (want {x})" for e, g, x in failures
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_classify()
|
|
print(f"OK: all {len(CASES)} classifier cases pass")
|