Skip to content
31 changes: 14 additions & 17 deletions httpx/_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,18 @@ def __str__(self) -> str:
)


def _check_ascii_printable(url: str, key: str | None = None) -> None:
if any(char.isascii() and not char.isprintable() for char in url):
char = next(char for char in url if char.isascii() and not char.isprintable())
idx = url.find(char)
component = f" {key} component" if key else ""
error = (
f"Invalid non-printable ASCII character in URL{component},"
f" {char!r} at position {idx}."
)
raise InvalidURL(error)


def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
# Initial basic checks on allowable URLs.
# ---------------------------------------
Expand All @@ -220,13 +232,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:

# If a URL includes any ASCII control characters including \t, \r, \n,
# then treat it as invalid.
if any(char.isascii() and not char.isprintable() for char in url):
char = next(char for char in url if char.isascii() and not char.isprintable())
idx = url.find(char)
error = (
f"Invalid non-printable ASCII character in URL, {char!r} at position {idx}."
)
raise InvalidURL(error)
_check_ascii_printable(url)

# Some keyword arguments require special handling.
# ------------------------------------------------
Expand Down Expand Up @@ -270,16 +276,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:

# If a component includes any ASCII control characters including \t, \r, \n,
# then treat it as invalid.
if any(char.isascii() and not char.isprintable() for char in value):
char = next(
char for char in value if char.isascii() and not char.isprintable()
)
idx = value.find(char)
error = (
f"Invalid non-printable ASCII character in URL {key} component, "
f"{char!r} at position {idx}."
)
raise InvalidURL(error)
_check_ascii_printable(value, key)

# Ensure that keyword arguments match as a valid regex.
if not COMPONENT_REGEX[key].fullmatch(value):
Expand Down