Skip to content

Commit 74b2846

Browse files
authored
Add basilisp.url namespace for structured URL manipulation (#1240)
Fixes #1239
1 parent 6cb6729 commit 74b2846

File tree

4 files changed

+258
-0
lines changed

4 files changed

+258
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77
## [Unreleased]
88
### Added
99
* Added support for referring imported Python names as by `from ... import ...` (#1154)
10+
* Added the `basilisp.url` namespace for structured URL manipulation (#1239)
1011

1112
### Changed
1213
* Removed implicit support for single-use iterables in sequences, and introduced `iterator-seq` to expliciltly handle them (#1192)

docs/api/url.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
basilisp.url
2+
============
3+
4+
.. toctree::
5+
:maxdepth: 2
6+
:caption: Contents:
7+
8+
.. autonamespace:: basilisp.url
9+
:members:
10+
:undoc-members:
11+
:exclude-members: ->URL, map->URL

src/basilisp/url.lpy

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
(ns basilisp.url
2+
"Namespace providing simple utility functions for creating and modifying URLs,
3+
wrapping functionality provided in :external:py:mod:`urllib.parse` in a more
4+
Clojure-like API.
5+
6+
The primary entrypoint function is :lpy:fn:`url` which will parse the input
7+
value into an :lpy:rec:`URL` record type, which allows for structured manipulation.
8+
Because the source type is a record, it can be manipulated using standard Clojure
9+
idioms such as :lpy:fn:`basilisp.core/assoc`.
10+
11+
After any modifications are made, :lpy:fn:`basilisp.core/str` can turn the URL
12+
record type into a string that can be used wherever string URLs are expected.
13+
14+
.. code-block::
15+
16+
(-> (url/url \"https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries\")
17+
(assoc :query {\"mobile\" [\"true\"]})
18+
(str))
19+
;; => \"https://en.wikipedia.org/wiki/Python_%28programming_language%29?mobile=true#Libraries\"
20+
21+
.. note::
22+
23+
Per the note in :external:py:func:`urllib.parse.urlunparse`, it is possible that
24+
round-trip return from this function is not identical to the input if no other
25+
changes are made to the URL parts, but the resulting URL should be equivalent.
26+
27+
.. warning::
28+
29+
No validation occurs creating a URL string from the provided URL record type,
30+
so users should take care to validate any components carefully before using
31+
these results.
32+
33+
To create a new URL record, :lpy:fn:`basilisp.core/assoc` keys onto
34+
:lpy:var:`blank-url`. This ensures that your URL has the correct defaults."
35+
(:import urllib.parse)
36+
(:require
37+
[basilisp.string :as str]))
38+
39+
(defn ^:private authority
40+
"Return a URL's authority (called ``netloc`` in :external:py:mod:`urllib.parse`),
41+
which consists of the 4 optional members: username, password, hostname, and port."
42+
[url]
43+
(let [creds (when (or (:username url) (:password url))
44+
(str/join ":" [(urllib.parse/quote (or (:username url) "") ** :safe "")
45+
(urllib.parse/quote (or (:password url) "") ** :safe "")]))
46+
hostname (or (:hostname url) "")
47+
host+port (if (:port url)
48+
(str/join ":" [hostname (:port url)])
49+
hostname)]
50+
(if creds
51+
(str/join "@" [creds host+port])
52+
host+port)))
53+
54+
(defn ^:private query-string
55+
"Convert the ``:query`` element of a URL from a map of vectors into a sequence of
56+
key/value pairs which can be consumed by :external:py:func:`urllib.parse.urlencode`."
57+
[url]
58+
(->> (:query url)
59+
(mapcat (fn [[k vs]]
60+
(map (fn [v] #py (k v)) vs)))
61+
(python/list)
62+
(urllib.parse/urlencode)))
63+
64+
(defrecord URL [scheme username password hostname port path params query fragment]
65+
(__str__ [self]
66+
(let [parts #py (scheme
67+
(authority self)
68+
(urllib.parse/quote path)
69+
params
70+
(query-string self)
71+
fragment)]
72+
(urllib.parse/urlunparse parts))))
73+
74+
(defprotocol URLSource
75+
(to-url* [this]
76+
"Convert the input type to an :lpy:rec:`URL`."))
77+
78+
(extend-protocol URLSource
79+
urllib.parse/ParseResult
80+
(to-url* [this]
81+
(let [query (-> (.-query this)
82+
(urllib.parse/parse-qs)
83+
(py->lisp :keywordize-keys false))]
84+
(->URL (.-scheme this)
85+
(when-let [username (.-username this)]
86+
(urllib.parse/unquote username))
87+
(when-let [password (.-password this)]
88+
(urllib.parse/unquote password))
89+
(.-hostname this)
90+
(.-port this)
91+
(urllib.parse/unquote (.-path this))
92+
(.-params this)
93+
query
94+
(.-fragment this))))
95+
96+
python/str
97+
(to-url* [this]
98+
(to-url* (urllib.parse/urlparse this))))
99+
100+
(defn url
101+
"Construct an :lpy:rec:`URL` record from the input value (such as a string) as by
102+
:external:py:func:`urllib.parse.urlparse`.
103+
104+
:lpy:rec:`URL` types have the following fields which you can manipulate directly
105+
using :lpy:fn:`basilisp.core/assoc`. The default values for each field is an empty
106+
string unless otherwise noted.
107+
108+
* ``:scheme``
109+
* ``:username`` (default ``nil``)
110+
* ``:password`` (default ``nil``)
111+
* ``:hostname`` (default ``nil``)
112+
* ``:port`` (default ``nil``)
113+
* ``:path``
114+
* ``:params``
115+
* ``:query`` (default ``{}``)
116+
* ``:fragment``
117+
118+
.. note::
119+
120+
Component fields of what Python calls the ``netloc`` (\"network location\")
121+
must be ``nil`` to be excluded from the final URL output. Empty strings are
122+
not equivalent to ``nil``. These include ``:username``, ``:password``,
123+
``:hostname``, and ``:port``.
124+
125+
.. note::
126+
127+
The ``:query`` component should be a mapping of string keys to vectors of
128+
values:
129+
130+
.. code-block::
131+
132+
(:query (url/url \"http://localhost/some/path?a=1&a=2&b=3\"))
133+
;; => {\"b\" [\"3\"] \"a\" [\"1\" \"2\"]}
134+
135+
.. note::
136+
137+
``url`` always decodes percent-encoded ``:username``, ``:password``, ``:path``, and
138+
``:query`` values. Users should not attempt to URL encode values added to the
139+
:lpy:rec:`URL` object returned by that function. Converting the ``URL`` back into
140+
a string will URL encode those same fields.
141+
142+
.. warning::
143+
144+
Because this function relies on ``urllib.parse.urlparse``, it does not perform
145+
any validation of the input URLs and all the caveats of that function apply here."
146+
[url-str]
147+
(to-url* url-str))
148+
149+
(def blank-url
150+
"Blank :lpy:rec:`URL` type which can be used as a base for URL manipulation."
151+
(url ""))
152+

tests/basilisp/test_url.lpy

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
(ns tests.basilisp.test-url
2+
(:require
3+
[basilisp.url :as url]
4+
[basilisp.test :refer [deftest is are testing]]))
5+
6+
(deftest blank-url-test
7+
(is (= url/blank-url
8+
(url/map->URL {:scheme ""
9+
:username nil
10+
:password nil
11+
:hostname nil
12+
:port nil
13+
:path ""
14+
:params ""
15+
:query {}
16+
:fragment "" }))))
17+
18+
(deftest url-test
19+
(are [res url-str] (= res (url/url url-str))
20+
(assoc url/blank-url
21+
:scheme "https"
22+
:hostname "en.wikipedia.org"
23+
:path "/wiki/Python_(programming_language)"
24+
:fragment "Libraries")
25+
"https://en.wikipedia.org/wiki/Python_(programming_language)#Libraries"
26+
27+
(assoc url/blank-url
28+
:scheme "http"
29+
:username "user name"
30+
:password "pass word"
31+
:hostname "localhost")
32+
"http://user%20name:pass%20word@localhost"
33+
34+
(assoc url/blank-url
35+
:scheme "http"
36+
:username ""
37+
:password "pass word"
38+
:hostname "localhost")
39+
"http://:pass%20word@localhost"
40+
41+
(assoc url/blank-url
42+
:scheme "http"
43+
:username "user name"
44+
:password ""
45+
:hostname "localhost")
46+
"http://user%20name:@localhost"
47+
48+
(assoc url/blank-url
49+
:scheme "http"
50+
:hostname "localhost"
51+
:path "/path with/some spaces/")
52+
"http://localhost/path%20with/some%20spaces/"
53+
54+
(assoc url/blank-url
55+
:scheme "http"
56+
:hostname "localhost"
57+
:path "/path/to/some/resource"
58+
:query {"arg" ["val with spaces"]})
59+
"http://localhost/path/to/some/resource?arg=val+with+spaces"))
60+
61+
(deftest url-authority-test
62+
(let [base-url (url/url "http://localhost")]
63+
(testing "username and password"
64+
(is (= (str (assoc base-url :username "user"))
65+
"http://user:@localhost"))
66+
(is (= (str (assoc base-url :password "pass"))
67+
"http://:pass@localhost"))
68+
(is (= (str (assoc base-url :username "" :password ""))
69+
"http://:@localhost"))
70+
(is (= (str (assoc base-url :username "user" :password "pass"))
71+
"http://user:pass@localhost"))
72+
(is (= (str (assoc base-url :username "user name" :password "pass word"))
73+
"http://user%20name:pass%20word@localhost")))
74+
75+
(testing "hostname and port"
76+
(is (= (str (assoc base-url :port 8080))
77+
"http://localhost:8080"))
78+
(is (= (str (assoc base-url :hostname nil :port 8080))
79+
"http://:8080"))
80+
(is (= (str (assoc base-url :hostname "chris-laptop.local" :port 8080))
81+
"http://chris-laptop.local:8080")))
82+
83+
(testing "username, password, hostname, and port"
84+
(is (= (str (assoc base-url
85+
:username "aîlene"
86+
:password "pass with space"
87+
:hostname "chris-laptop.local"
88+
:port 8080))
89+
"http://a%C3%AElene:pass%20with%[email protected]:8080")))))
90+
91+
(deftest url-query-string-test
92+
(is (contains? #{{"a" ["1"] "b" ["2" "3"]}
93+
{"a" ["1"] "b" ["3" "2"]}}
94+
(:query (url/url "http://localhost?a=1&b=2&b=3")))))

0 commit comments

Comments
 (0)