Skip to content

Commit 9baa1af

Browse files
hrm this is confugin
1 parent ada004f commit 9baa1af

File tree

3 files changed

+129
-63
lines changed

3 files changed

+129
-63
lines changed

.prettierignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tests/cassettes

clean_links/unshorten.py

+67-47
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import contextlib
2-
import logging
32
import warnings
43
from typing import Generator, Union
54

@@ -33,62 +32,83 @@ def disable_ssl_warnings() -> Generator:
3332
yield None
3433

3534

36-
def get_last_url_from_exception(exc: Exception) -> Union[str, None]:
37-
result = None
38-
35+
def send(
36+
session: requests.Session,
37+
prepped: requests.PreparedRequest,
38+
history: dict,
39+
verify: bool,
40+
timeout: float,
41+
) -> Union[requests.PreparedRequest, None]:
42+
history["requests"].append(prepped)
3943
try:
40-
if exc.response and exc.response.url:
41-
result = exc.response.url
42-
elif exc.request:
43-
result = exc.request.url
44-
except Exception as exc:
45-
logging.exception("exception occurred while getting last url")
44+
response = session.send(
45+
prepped, allow_redirects=False, verify=verify, timeout=timeout
46+
)
47+
except requests.exceptions.RequestException as exc:
48+
exc.history = history
49+
raise
50+
else:
51+
history["responses"].append(response)
52+
53+
return response.next
54+
55+
56+
def request_redirect_chain(
57+
session: requests.Session,
58+
url: str,
59+
verify: bool,
60+
timeout: float,
61+
headers: dict,
62+
method: str = "HEAD",
63+
) -> dict:
64+
history: dict = {
65+
"requests": [],
66+
"responses": [],
67+
}
68+
69+
# prepare initial request
70+
request = requests.Request(method, url, headers=headers)
71+
prepped = session.prepare_request(request)
72+
73+
# send and follow the redirect chain, filling in the history
74+
next_prepped = send(session, prepped, history, verify, timeout)
75+
while next_prepped:
76+
next_prepped = send(session, prepped, history, verify, timeout)
77+
78+
return history
4679

47-
return result
80+
81+
def format_exception(exc: Union[Exception, None]) -> Union[str, None]:
82+
if exc is None:
83+
return None
84+
else:
85+
return f"{type(exc).__name__}: {exc}"
4886

4987

5088
def unshorten_url(
51-
url: str, timeout: int = 9, verify: bool = False, headers: dict = HEADERS
89+
url: str, timeout: float = 9, verify: bool = False, headers: dict = HEADERS
5290
) -> dict:
5391
with requests.Session() as session, disable_ssl_warnings():
92+
exception = None
5493
try:
55-
response = session.head(
56-
url,
57-
allow_redirects=True,
58-
timeout=timeout,
59-
headers=headers,
60-
verify=verify,
94+
history = request_redirect_chain(
95+
session, url, verify, timeout, headers, "HEAD"
6196
)
62-
except requests.exceptions.MissingSchema:
63-
raise
64-
except requests.exceptions.InvalidURL:
65-
raise
66-
except requests.exceptions.InvalidSchema as exc:
67-
msg = str(exc)
68-
if msg.startswith("No connection adapters were found"):
69-
resolved = msg[39:-1]
70-
return {
71-
"url": url,
72-
"resolved": resolved,
73-
"status": None,
74-
"exception": f"{type(exc).__name__}: {exc}",
75-
}
76-
else:
77-
raise
7897
except requests.exceptions.RequestException as exc:
79-
return {
80-
"url": url,
81-
"resolved": get_last_url_from_exception(exc),
82-
"status": None,
83-
"exception": f"{type(exc).__name__}: {exc}",
84-
}
85-
else:
86-
return {
87-
"url": url,
88-
"resolved": response.url,
89-
"status": response.status_code,
90-
"exception": None,
91-
}
98+
exception = exc
99+
history = getattr(exc, "history", {})
100+
if not history or not history["responses"]:
101+
raise
102+
103+
response = history["responses"][-1]
104+
return {
105+
"url": url,
106+
"resolved": response.url,
107+
"status": response.status_code,
108+
"exception": format_exception(exception),
109+
"request_history": [r.url for r in history["requests"]],
110+
"response_history": [r.status_code for r in history["responses"]],
111+
}
92112

93113

94114
def main() -> None:

tests/test_unshorten.py

+61-16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@
44
from clean_links.unshorten import unshorten_url
55

66

7+
def test_missing_schema():
8+
url = "Ceci n'est pas une URL"
9+
with pytest.raises(requests.exceptions.MissingSchema):
10+
unshorten_url(url)
11+
12+
13+
def test_not_an_address():
14+
url = "https://www.definitely-not-a-website.boogety"
15+
with pytest.raises(requests.exceptions.ConnectionError):
16+
unshorten_url(url)
17+
18+
719
# cassettes/{module_name}/test_single.yaml will be used
820
@pytest.mark.vcr
921
def test_unchanged():
@@ -14,65 +26,98 @@ def test_unchanged():
1426
"resolved": "https://example.com/",
1527
"status": 200,
1628
"exception": None,
29+
"request_history": ["https://example.com/"],
30+
"response_history": [200],
1731
}
1832

1933

2034
@pytest.mark.vcr
2135
def test_single_redirect():
2236
url = "https://trib.al/5m7fAg3"
2337
result = unshorten_url(url)
38+
resolved = "https://www.bloomberg.com/news/articles/2024-01-24/cryptocurrency-ai-electricity-demand-seen-doubling-in-three-years?cmpid%3D=socialflow-twitter-tech&utm_content=tech&utm_medium=social&utm_campaign=socialflow-organic&utm_source=twitter"
2439
assert result == {
2540
"url": url,
26-
"resolved": "https://www.bloomberg.com/news/articles/2024-01-24/cryptocurrency-ai-electricity-demand-seen-doubling-in-three-years?cmpid%3D=socialflow-twitter-tech&utm_content=tech&utm_medium=social&utm_campaign=socialflow-organic&utm_source=twitter",
41+
"resolved": resolved,
2742
"status": 200,
2843
"exception": None,
44+
"request_history": [url, resolved],
45+
"response_history": [301, 200],
2946
}
3047

3148

3249
@pytest.mark.vcr
3350
def test_multiple_redirect():
3451
url = "https://hubs.la/Q01HRjhm0"
3552
result = unshorten_url(url)
53+
resolved = "https://app.east.mentorspaces.com/#!/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D"
3654
assert result == {
3755
"url": url,
38-
"resolved": "https://app.east.mentorspaces.com/#!/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D",
56+
"resolved": resolved,
3957
"status": 200,
4058
"exception": None,
59+
"request_history": [
60+
url,
61+
"https://mentorspaces.app.link/nsbe?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094",
62+
"https://app.east.mentorspaces.com/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D",
63+
"http://app.east.mentorspaces.com/#!/orgs/6aab4989-2bd1-7ec9-6e3f-56f3128815c8/?utm_content=242450506&utm_medium=social&utm_source=twitter&hss_channel=tw-18419094&_branch_match_id=1280732352261121106&utm_campaign=fall-recruiting&_branch_referrer=H4sIAAAAAAAAAx3KUQqEIBAA0Nv0V1rYsgXSUcJmB5R0FGek61f7%2B3hepPCqVEKSXLk4QB5cKUMMdCriA7cmaYdM8gw7mcnMetaf7tWEv9CS5QzBxb9wbhXQyhVEsHaeeQfviDA%2B1o9fMy56MTdcnGABdQAAAA%3D%3D",
64+
resolved,
65+
],
66+
"response_history": [301, 307, 301, 301, 200],
4167
}
4268

4369

4470
@pytest.mark.vcr
4571
def test_expired_certificate_ignore():
4672
url = "https://expired.badssl.com/"
4773
result = unshorten_url(url, verify=False)
74+
resolved = "https://expired.badssl.com/"
4875
assert result == {
4976
"url": url,
50-
"resolved": "https://expired.badssl.com/",
77+
"resolved": resolved,
5178
"status": 200,
5279
"exception": None,
80+
"request_history": [url],
81+
"response_history": [200],
5382
}
5483

5584

5685
@pytest.mark.vcr
5786
def test_resolve_to_mailto():
5887
url = "https://tinyurl.com/NewwAlemAndKibrom"
5988
result = unshorten_url(url)
89+
resolved = "https://tinyurl.com/NewwAlemAndKibrom"
6090
assert result["url"] == url
61-
assert (
62-
result["resolved"]
63-
== "mailto:[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected]?subject=URGENT%20CALL%20TO%20ACTION%3A%20Save%20Alem%20and%20Kibrom's%20lives&body=To%20Whom%20it%20May%20Concern%3A%0A%0AAlem%20Tesfay%20Abraham%20and%20Kibrom%20Adhanom%20Okbazghi%20are%20two%20Eritrean%20asylum-seekers%20who%20have%20been%20detained%20without%20charge%20in%20Egypt%20since%202012%20and%202014%2C%20respectively.%20They%20now%20are%20facing%20deportation%20to%20Eritrea%20without%20ever%20receiving%20the%20opportunity%20to%20register%20as%20refugees%20with%20UNHCR%20in%20Egypt.%20On%209%20September%2C%20they%20were%20taken%20from%20prison%20to%20a%20hospital%20in%20Cairo%20to%20take%20PCR%20tests%20and%20were%20informed%20by%20a%20prison%20official%20that%20they%20would%20be%20deported%20to%20Eritrea%20on%20the%20oncoming%20days.%0A%0AForcibly%20returning%20Alem%20and%20Kibrom%20to%20Eritrea%2C%20where%20they%20fled%20indefinite%20military%20conscription%20and%20where%20they%20would%20face%20persecution%2C%20is%20a%20grave%20breach%20of%20international%20law.%20Eritrean%20asylum-seekers%20who%20are%20forcibly%20returned%20to%20Eritrea%20risk%20arbitrary%20arrest%2C%20forced%20disappearance%20and%20indefinite%20detention%20without%20charges.%20As%20widely%20documented%20by%20many%20NGOs%20as%20well%20as%20the%20UN%20Human%20Rights%20Council%2C%20citizens%20in%20Eritrea%20are%20held%20in%20prisons%20incommunicado%2C%20in%20unsanitary%20living%20conditions%2C%20where%20torture%20and%20other%20ill%20treatments%20are%20taking%20place%20to%20present.%0A%0AForcing%20Alem%20and%20Kibrom%20back%20to%20the%20nation%20they%20are%20seeking%20asylum%20from%20violates%20the%201951%20Convention%20and%201967%20Protocol%2C%20two%20International%20Laws%20Egypt%20has%20agreed%20to.%20They%20deserve%20the%20right%20to%20be%20resettled%20by%20will%2C%20to%20a%20country%20willing%20to%20accept%20them.%20We%20urge%20you%2C%20the%20Egyptian%20authorities%2C%20and%20all%20other%20relevant%20bodies%2C%20to%20help%20stop%20the%20forced%20repatriation%20of%20Alem%20and%20Kibrom%20and%20protect%20them%20from%20persecution%20and%20grant%20them%20their%20long-awaited%20freedom.%20%0A%0A%23JusticeforAlemAndKibrom%0A%0ASincerely%2C"
64-
)
65-
assert result["status"] is None
66-
assert (
67-
result["exception"]
68-
== 'InvalidSchema: No connection adapters were found for "mailto:[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected],%[email protected]?subject=URGENT%20CALL%20TO%20ACTION%3A%20Save%20Alem%20and%20Kibrom\'s%20lives&body=To%20Whom%20it%20May%20Concern%3A%0A%0AAlem%20Tesfay%20Abraham%20and%20Kibrom%20Adhanom%20Okbazghi%20are%20two%20Eritrean%20asylum-seekers%20who%20have%20been%20detained%20without%20charge%20in%20Egypt%20since%202012%20and%202014%2C%20respectively.%20They%20now%20are%20facing%20deportation%20to%20Eritrea%20without%20ever%20receiving%20the%20opportunity%20to%20register%20as%20refugees%20with%20UNHCR%20in%20Egypt.%20On%209%20September%2C%20they%20were%20taken%20from%20prison%20to%20a%20hospital%20in%20Cairo%20to%20take%20PCR%20tests%20and%20were%20informed%20by%20a%20prison%20official%20that%20they%20would%20be%20deported%20to%20Eritrea%20on%20the%20oncoming%20days.%0A%0AForcibly%20returning%20Alem%20and%20Kibrom%20to%20Eritrea%2C%20where%20they%20fled%20indefinite%20military%20conscription%20and%20where%20they%20would%20face%20persecution%2C%20is%20a%20grave%20breach%20of%20international%20law.%20Eritrean%20asylum-seekers%20who%20are%20forcibly%20returned%20to%20Eritrea%20risk%20arbitrary%20arrest%2C%20forced%20disappearance%20and%20indefinite%20detention%20without%20charges.%20As%20widely%20documented%20by%20many%20NGOs%20as%20well%20as%20the%20UN%20Human%20Rights%20Council%2C%20citizens%20in%20Eritrea%20are%20held%20in%20prisons%20incommunicado%2C%20in%20unsanitary%20living%20conditions%2C%20where%20torture%20and%20other%20ill%20treatments%20are%20taking%20place%20to%20present.%0A%0AForcing%20Alem%20and%20Kibrom%20back%20to%20the%20nation%20they%20are%20seeking%20asylum%20from%20violates%20the%201951%20Convention%20and%201967%20Protocol%2C%20two%20International%20Laws%20Egypt%20has%20agreed%20to.%20They%20deserve%20the%20right%20to%20be%20resettled%20by%20will%2C%20to%20a%20country%20willing%20to%20accept%20them.%20We%20urge%20you%2C%20the%20Egyptian%20authorities%2C%20and%20all%20other%20relevant%20bodies%2C%20to%20help%20stop%20the%20forced%20repatriation%20of%20Alem%20and%20Kibrom%20and%20protect%20them%20from%20persecution%20and%20grant%20them%20their%20long-awaited%20freedom.%20%0A%0A%23JusticeforAlemAndKibrom%0A%0ASincerely%2C"'
69-
)
91+
assert result["resolved"] == resolved
92+
assert result["status"] == 301
93+
assert result["exception"].startswith("InvalidSchema: No connection adap")
94+
assert result["request_history"][0] == url
95+
assert result["request_history"][1].startswith("mailto:[email protected]")
96+
assert result["response_history"] == [301]
7097

7198

72-
def test_missing_schema():
73-
url = "I AM NOT AN URL"
74-
with pytest.raises(requests.exceptions.MissingSchema):
75-
unshorten_url(url)
99+
@pytest.mark.vcr
100+
def test_invalid_url_in_redirect_chain():
101+
"""What should this actually do?
102+
103+
Throw error like if it was an invalid URL to begin with?
104+
105+
Or should it return the last valid URL in the redirect chain?
106+
107+
I think the last URL in the chain..
108+
109+
"""
110+
url = "https://ctt.ec/5kum7+"
111+
result = unshorten_url(url)
112+
resolved = "https://clicktotweet.com/5kum7+"
113+
assert result == {
114+
"url": url,
115+
"resolved": resolved,
116+
"status": 302,
117+
"exception": "InvalidURL: No host specified.",
118+
"request_history": [url, resolved, "http://"],
119+
"response_history": [301, 302],
120+
}
76121

77122

78123
# def test_expired_certificate_verify():

0 commit comments

Comments
 (0)