diff --git a/scripts/workers/services/telecom/undetected_browser.py b/scripts/workers/services/telecom/undetected_browser.py index 79ef4f0..a23037a 100644 --- a/scripts/workers/services/telecom/undetected_browser.py +++ b/scripts/workers/services/telecom/undetected_browser.py @@ -110,19 +110,36 @@ def _proxy_config(proxy_env: str = "UNDETECTED_PROXY_URL") -> dict | None: if not url: return None - # Playwright's proxy dict supports: server, username, password, bypass - from urllib.parse import urlparse + # Parse manually so credentials may contain URL-reserved characters such + # as '#', '@', ':' or '/'. urlparse() chokes on those (e.g. a '#' in the + # password is misread as a fragment, corrupting the port), so we split the + # ``scheme://creds@host:port`` shape ourselves and percent-decode the + # username/password. The password may be stored raw OR percent-encoded + # (e.g. '#' as '%23') in the env — unquote() handles both. + from urllib.parse import unquote - parsed = urlparse(url) - server = f"{parsed.scheme}://{parsed.hostname}" - if parsed.port: - server += f":{parsed.port}" + rest = url + scheme = "" + if "://" in rest: + scheme, rest = rest.split("://", 1) - cfg: dict = {"server": server} - if parsed.username: - cfg["username"] = parsed.username - if parsed.password: - cfg["password"] = parsed.password + creds = "" + hostport = rest + if "@" in rest: + # rsplit so an '@' inside the password doesn't split the host off early + creds, hostport = rest.rsplit("@", 1) + + cfg: dict = {"server": f"{scheme}://{hostport}" if scheme else hostport} + + if creds: + if ":" in creds: + user, pw = creds.split(":", 1) + else: + user, pw = creds, "" + if user: + cfg["username"] = unquote(user) + if pw: + cfg["password"] = unquote(pw) return cfg