-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch-assets.py
executable file
·86 lines (64 loc) · 2.33 KB
/
fetch-assets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
import hashlib
import os
import re
import sys
from urllib.parse import urlparse
import requests
import yaml
API_BASE_URL = "https://api.developers.italia.it/v1"
def absolute_url(url, repo):
repo = re.sub('.git$', '', repo.lower())
if not url or url.lower().startswith(("http://", "https://")):
return url
repo_url = urlparse(repo)
hostname = repo_url.hostname.lower()
if hostname == "github.com":
return "https://raw.githubusercontent.com" + os.path.join(
repo_url.path, "HEAD", url
)
elif hostname == "bitbucket.org":
return "https://bitbucket.org" + os.path.join(repo_url.path, "raw/HEAD", url)
else:
# GitLab
return f"{repo_url.scheme}://{repo_url.hostname}" + os.path.join(
repo_url.path, "-/raw/HEAD", url
)
def get_software():
page = True
page_after = ""
while page:
res = requests.get(f"{API_BASE_URL}/software?{page_after}")
res.raise_for_status()
body = res.json()
page_after = body["links"]["next"]
if page_after:
# Remove the '?'
page_after = page_after[1:]
page = bool(page_after)
for item in body.get("data", []):
yield item
def download_file(url, filename):
os.makedirs(os.path.dirname(filename), exist_ok=True)
try:
with requests.get(url, stream=True) as response:
response.raise_for_status()
with open(filename, 'wb') as file:
for chunk in response.iter_content(chunk_size=64*1024):
if chunk:
file.write(chunk)
except requests.RequestException as e:
print(f"Error downloading {url}", e)
if __name__ == "__main__":
for software in get_software():
try:
publiccode = yaml.safe_load(software["publiccodeYml"])
except (yaml.YAMLError, ValueError) as e:
print(f"Error parsing YAML ({API_BASE_URL}/software/{software["id"]}): {e}", file=sys.stderr)
continue
logo = publiccode.get("logo", None)
if logo:
logo = absolute_url(logo, publiccode["url"])
hash = hashlib.sha1(logo.encode("utf-8")).hexdigest()
_, ext = os.path.splitext(logo)
download_file(logo, f"{hash[:2]}/{hash[2:]}{ext}")