|
| 1 | +#!/usr/bin/python3 |
| 2 | +"""parses the title of all hot articles, and prints a sorted count of given keywords""" |
| 3 | + |
| 4 | +import requests |
| 5 | +import re |
| 6 | +from collections import defaultdict |
| 7 | + |
| 8 | +def count_words(subreddit, word_list, after=None, word_counts=None): |
| 9 | + """ |
| 10 | + Recursively count the number of occurrences of each word from word_list in the hot articles of the given subreddit. |
| 11 | +
|
| 12 | + Args: |
| 13 | + subreddit (str): The subreddit to search. |
| 14 | + word_list (list): A list of case-insensitive words to search for. |
| 15 | + after (str, optional): The fullname of a reddit thing. Used for pagination. |
| 16 | + word_counts (defaultdict, optional): A defaultdict to store the word counts. |
| 17 | +
|
| 18 | + Returns: |
| 19 | + defaultdict: A defaultdict containing the word counts. |
| 20 | + """ |
| 21 | + url = f"https://www.reddit.com/r/{subreddit}/hot.json" |
| 22 | + headers = {"User-Agent": "Mozilla/5.0"} |
| 23 | + params = {"limit": 100} |
| 24 | + if after: |
| 25 | + params["after"] = after |
| 26 | + |
| 27 | + try: |
| 28 | + response = requests.get(url, headers=headers, params=params, allow_redirects=False) |
| 29 | + response.raise_for_status() |
| 30 | + data = response.json() |
| 31 | + except (requests.exceptions.RequestException, ValueError, KeyError): |
| 32 | + return defaultdict(int) # Return an empty defaultdict if an error occurs |
| 33 | + |
| 34 | + if word_counts is None: |
| 35 | + word_counts = defaultdict(int) |
| 36 | + |
| 37 | + for child in data["data"]["children"]: |
| 38 | + title = child["data"]["title"] |
| 39 | + for word in word_list: |
| 40 | + count = len(re.findall(fr'\b{word}\b', title, re.IGNORECASE)) |
| 41 | + word_counts[word.lower()] += count |
| 42 | + |
| 43 | + after = data["data"].get("after", None) |
| 44 | + if after: |
| 45 | + count_words(subreddit, word_list, after, word_counts) |
| 46 | + |
| 47 | + return word_counts |
| 48 | + |
| 49 | +if __name__ == "__main__": |
| 50 | + import sys |
| 51 | + |
| 52 | + if len(sys.argv) < 3: |
| 53 | + print("Usage: {} <subreddit> <list of keywords>".format(sys.argv[0])) |
| 54 | + print("Ex: {} programming 'python java javascript'".format(sys.argv[0])) |
| 55 | + else: |
| 56 | + subreddit = sys.argv[1] |
| 57 | + word_list = [x for x in sys.argv[2].split()] |
| 58 | + word_counts = count_words(subreddit, word_list) |
| 59 | + |
| 60 | + sorted_counts = sorted(word_counts.items(), key=lambda x: (-x[1], x[0].lower())) |
| 61 | + for word, count in sorted_counts: |
| 62 | + if count > 0: |
| 63 | + print(f"{word}: {count}") |
0 commit comments