|
| 1 | +# /// script |
| 2 | +# requires-python = ">=3.10" |
| 3 | +# /// |
| 4 | +from __future__ import annotations |
| 5 | + |
| 6 | +import datetime as dt |
| 7 | +import json |
| 8 | +import urllib.parse |
| 9 | +import urllib.request |
| 10 | +from pathlib import Path |
| 11 | + |
| 12 | + |
| 13 | +def get_clickhouse_data() -> str: |
| 14 | + params = {"user": "demo", "default_format": "JSON"} |
| 15 | + |
| 16 | + today = dt.datetime.now() |
| 17 | + first_of_this_month = today.replace(day=1) |
| 18 | + last_month = first_of_this_month - dt.timedelta(days=1) |
| 19 | + last_month = last_month.strftime("%Y-%m-01") |
| 20 | + print(f"{last_month=}") |
| 21 | + query = f""" |
| 22 | + SELECT SUM(count) AS download_count, project |
| 23 | + FROM pypi.pypi_downloads_per_month |
| 24 | + WHERE month = '{last_month}' |
| 25 | + GROUP BY project |
| 26 | + ORDER BY download_count DESC |
| 27 | + LIMIT 15000""" |
| 28 | + |
| 29 | + url = "https://sql-clickhouse.clickhouse.com?" + urllib.parse.urlencode(params) |
| 30 | + req = urllib.request.Request(url, data=query.encode("utf-8"), method="POST") |
| 31 | + with urllib.request.urlopen(req) as response: |
| 32 | + data = response.read().decode("utf-8") |
| 33 | + return data |
| 34 | + |
| 35 | + |
| 36 | +def reformat_clickhouse_json(input_data: dict) -> None: |
| 37 | + rows = [ |
| 38 | + {"download_count": int(row["download_count"]), "project": row["project"]} |
| 39 | + for row in input_data["data"] |
| 40 | + ] |
| 41 | + |
| 42 | + reformatted_data = { |
| 43 | + "last_update": dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), |
| 44 | + "source": "ClickHouse", |
| 45 | + } |
| 46 | + # Rename rows->total_rows and data->rows |
| 47 | + for k, v in input_data.items(): |
| 48 | + if k == "rows": |
| 49 | + reformatted_data["total_rows"] = v |
| 50 | + elif k == "data": |
| 51 | + reformatted_data["rows"] = rows |
| 52 | + else: |
| 53 | + reformatted_data[k] = v |
| 54 | + |
| 55 | + Path("top-pypi-packages.json").write_text( |
| 56 | + json.dumps(reformatted_data, indent=0) + "\n" |
| 57 | + ) |
| 58 | + print("Saved to top-pypi-packages.json") |
| 59 | + |
| 60 | + |
| 61 | +def main() -> None: |
| 62 | + data = get_clickhouse_data() |
| 63 | + data = json.loads(data) |
| 64 | + reformat_clickhouse_json(data) |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + main() |
0 commit comments