diff --git a/scripts/_email_plaintext.py b/scripts/_email_plaintext.py
index 3fa7f99..4980d9d 100644
--- a/scripts/_email_plaintext.py
+++ b/scripts/_email_plaintext.py
@@ -39,9 +39,13 @@ _DROP_BLOCKS = re.compile(
)
# HTML comments (Listmonk/MSO conditional comments etc.).
_COMMENTS = re.compile(r"", _RE_FLAGS)
-# TEXT -> TEXT (URL) (skip mailto:/tel:/anchors/templated)
+# TEXT -> TEXT (URL) (skip mailto:/tel:/anchors)
+# Handles quoted ("..." / '...') and unquoted (href=URL) hrefs.
_ANCHORS = re.compile(
- r']*?\bhref\s*=\s*["\']([^"\']+)["\'][^>]*>(.*?)', _RE_FLAGS
+ r']*?\bhref\s*=\s*'
+ r'(?:"([^"]*)"|\'([^\']*)\'|([^\s">]+))'
+ r'[^>]*>(.*?)',
+ _RE_FLAGS,
)
# Tags that should become a line break.
_BR = re.compile(r"
", re.IGNORECASE)
@@ -58,8 +62,9 @@ _MANY_SPACES = re.compile(r"[ \t]{2,}")
def _anchor_repl(m: "re.Match[str]") -> str:
- url = m.group(1).strip()
- text = _ANY_TAG.sub("", m.group(2)).strip()
+ # href is whichever of the 3 alternatives matched (double/single/unquoted).
+ url = (m.group(1) or m.group(2) or m.group(3) or "").strip()
+ text = _ANY_TAG.sub("", m.group(4)).strip()
low = url.lower()
# mailto:/tel: -> surface the address (with link text if it adds info).
# Bare in-page anchors -> keep text only. Templated hrefs (e.g.
diff --git a/scripts/test_email_plaintext.py b/scripts/test_email_plaintext.py
new file mode 100644
index 0000000..82432e6
--- /dev/null
+++ b/scripts/test_email_plaintext.py
@@ -0,0 +1,86 @@
+"""Tests for scripts/_email_plaintext.html_to_text (campaign altbody generation).
+
+Run: python3 -m pytest scripts/test_email_plaintext.py
+ or: python3 scripts/test_email_plaintext.py
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from scripts._email_plaintext import html_to_text # noqa: E402
+
+
+def test_empty_input():
+ assert html_to_text("") == ""
+ assert html_to_text(None) == "" # type: ignore[arg-type]
+
+
+def test_strips_script_style_head():
+ html = "
Hello {{ .Subscriber.Attribs.company }}
" + 'unsubscribe' + ) + out = html_to_text(html) + assert "{{ .Subscriber.Attribs.company }}" in out + assert "{{ UnsubscribeURL }}" in out + + +def test_lists_become_dashes(): + out = html_to_text("A
| C |
Tom & Jerry's
") + + +def test_collapses_blank_lines(): + out = html_to_text("A
B
") + assert "\n\n\n" not in out + + +if __name__ == "__main__": + fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")] + for fn in fns: + fn() + print(f"PASS {fn.__name__}") + print(f"\nAll {len(fns)} tests passed.")