"""Unit tests for the healthcare email-stream classifier. Run: python3 -m scripts.test_healthcare_email_streams (or pytest) Guards the subtle case that motivated the two-tier DIRECT detection: a naive substring "direct" match wrongly parked real "Direct Primary Care" / counseling practices (registrable domain merely contains the word) into the undeliverable HISP pile. Direct/HISP gateways instead use "direct"/"hisp" as a whole DNS label. """ from scripts.healthcare_email_streams import classify CASES = [ # Real Direct-Primary-Care / counseling practices -> institutional ("chelsea@arthurdirectcare.com", "institutional"), ("bassi@valleydirectprimarycare.com", "institutional"), ("megan@newdirectionscounselingservices.com", "institutional"), ("john@islanddirectprimarycare.com", "institutional"), ("kamlesh@mydirectcare.com", "institutional"), ("allison@truedirectioncounseling.com", "institutional"), ("marty@holtondirectcare.com", "institutional"), ("sbass@newdirectionsnonemergency.org", "institutional"), ("info@consumerdirectcare.com", "institutional"), ("x@rehabdirectives.com", "institutional"), # Genuine Direct/HISP gateways -> direct (parked) ("x@direct.novanthealth.org", "direct"), ("x@CarolinasHealthcareSystem.direct-ci.com", "direct"), ("x@cfp.directbygreenway.com", "direct"), ("x@foo.4693.direct.athenahealth.com", "direct"), ("x@directHISP.wakemed.org", "direct"), ("x@boss.directak.net", "direct"), ("x@hisp.bryanhealth.org", "direct"), ("x@ehrdirect.mayoclinicmsg.org", "direct"), ("x@directaddress.net", "direct"), ("x@negaidx.allscriptsdirect.net", "direct"), ("x@mmiller@lickingmemorial.medicity.net".replace("mmiller@", ""), "direct"), ("x@foo.nextgenshare.com", "direct"), # Consumer / institutional / excluded / invalid ("drsmith@gmail.com", "consumer"), ("info@smallclinic.com", "institutional"), ("x@somehospital.va.gov", "excluded"), ("x@base.health.mil", "excluded"), ("not-an-email", "invalid"), ("", "invalid"), ] def test_classify(): failures = [] for email, expected in CASES: got = classify(email) if got != expected: failures.append((email, got, expected)) assert not failures, "Misclassified: " + "; ".join( f"{e} -> {g} (want {x})" for e, g, x in failures ) if __name__ == "__main__": test_classify() print(f"OK: all {len(CASES)} classifier cases pass")