|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. |
| 4 | +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. |
| 5 | + |
| 6 | +# |
| 7 | +# This script fetches the list of top PyPI packages and saves them to a file. |
| 8 | +# It downloads the data from https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.json, |
| 9 | +# extracts the top 5000 package names using jq, and saves them to the specified location. |
| 10 | +# |
| 11 | +# If the destination file already exists, the script will do nothing. |
| 12 | +# |
| 13 | +# Usage: ./find_packages.sh [FOLDER] [FILE] |
| 14 | +# - FOLDER: The destination folder (default: ../src/macaron/resources) |
| 15 | +# - FILE: The destination filename (default: popular_packages.txt) |
| 16 | +# |
| 17 | +# Dependencies: curl, jq. |
| 18 | + |
| 19 | +# Set default values |
| 20 | +DEFAULT_FOLDER="../src/macaron/resources" |
| 21 | +DEFAULT_FILE="popular_packages.txt" |
| 22 | + |
| 23 | +# Override with provided arguments if they exist |
| 24 | +FOLDER=${1:-$DEFAULT_FOLDER} |
| 25 | +FILE=${2:-$DEFAULT_FILE} |
| 26 | + |
| 27 | +FULL_PATH="$FOLDER/$FILE" |
| 28 | +URL="https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.json" |
| 29 | + |
| 30 | +# Check if file exists |
| 31 | +if [ -f "$FULL_PATH" ]; then |
| 32 | + echo "$FULL_PATH already exists. Nothing to do." |
| 33 | +else |
| 34 | + echo "$FULL_PATH not found. Fetching top PyPI packages..." |
| 35 | + |
| 36 | + # Ensure the directory exists |
| 37 | + mkdir -p "$FOLDER" |
| 38 | + |
| 39 | + # Fetch and process JSON using curl and jq |
| 40 | + if curl -s "$URL" | jq -r '.rows[:5000] | sort_by(-.download_count) | .[].project' > "$FULL_PATH"; then |
| 41 | + echo "Successfully saved top 5000 packages to $FULL_PATH" |
| 42 | + else |
| 43 | + echo "Failed to fetch or process package data." |
| 44 | + exit 1 |
| 45 | + fi |
| 46 | +fi |
0 commit comments