The anchor regex only matched quoted hrefs; unquoted (href=URL) dropped the URL from the plaintext part. Now handles double/single/unquoted. Added scripts/test_email_plaintext.py (11 cases: link forms, mailto, template-tag preservation, tag stripping, entity unescape, blank-line collapse).
86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
"""Tests for scripts/_email_plaintext.html_to_text (campaign altbody generation).
|
|
|
|
Run: python3 -m pytest scripts/test_email_plaintext.py
|
|
or: python3 scripts/test_email_plaintext.py
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
from scripts._email_plaintext import html_to_text # noqa: E402
|
|
|
|
|
|
def test_empty_input():
|
|
assert html_to_text("") == ""
|
|
assert html_to_text(None) == "" # type: ignore[arg-type]
|
|
|
|
|
|
def test_strips_script_style_head():
|
|
html = "<head><style>.x{color:red}</style><title>t</title></head><body>Hi</body>"
|
|
out = html_to_text(html)
|
|
assert "color:red" not in out
|
|
assert "t" not in out.splitlines()[0] if out.splitlines() else True
|
|
assert out.strip() == "Hi"
|
|
|
|
|
|
def test_links_quoted_double():
|
|
assert "Check (https://performancewest.net/m?dot=1)" in html_to_text(
|
|
'<a href="https://performancewest.net/m?dot=1">Check</a>'
|
|
)
|
|
|
|
|
|
def test_links_quoted_single():
|
|
assert "Check (https://x.io)" in html_to_text("<a href='https://x.io'>Check</a>")
|
|
|
|
|
|
def test_links_unquoted_with_attrs():
|
|
assert "Check (https://x.io)" in html_to_text(
|
|
"<a href=https://x.io target=_blank>Check</a>"
|
|
)
|
|
|
|
|
|
def test_mailto_surfaces_address():
|
|
assert "Email us (info@performancewest.net)" in html_to_text(
|
|
'<a href="mailto:info@performancewest.net">Email us</a>'
|
|
)
|
|
|
|
|
|
def test_preserves_listmonk_template_tags():
|
|
html = (
|
|
"<p>Hello {{ .Subscriber.Attribs.company }}</p>"
|
|
'<a href="{{ UnsubscribeURL }}">unsubscribe</a>'
|
|
)
|
|
out = html_to_text(html)
|
|
assert "{{ .Subscriber.Attribs.company }}" in out
|
|
assert "{{ UnsubscribeURL }}" in out
|
|
|
|
|
|
def test_lists_become_dashes():
|
|
out = html_to_text("<ul><li>One</li><li>Two</li></ul>")
|
|
assert "- One" in out and "- Two" in out
|
|
|
|
|
|
def test_no_tags_leak():
|
|
html = "<div><p>A</p><br><span>B</span></div><table><tr><td>C</td></tr></table>"
|
|
out = html_to_text(html)
|
|
assert "<" not in out and ">" not in out
|
|
|
|
|
|
def test_entities_unescaped():
|
|
assert "Tom & Jerry's" in html_to_text("<p>Tom & Jerry's</p>")
|
|
|
|
|
|
def test_collapses_blank_lines():
|
|
out = html_to_text("<p>A</p><p></p><p></p><p>B</p>")
|
|
assert "\n\n\n" not in out
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
|
|
for fn in fns:
|
|
fn()
|
|
print(f"PASS {fn.__name__}")
|
|
print(f"\nAll {len(fns)} tests passed.")
|