|
| 1 | +import requests |
| 2 | +import csv |
| 3 | +import sys |
| 4 | +from bs4 import BeautifulSoup as bs |
| 5 | + |
| 6 | + |
| 7 | +def main(): |
| 8 | + # Base url for the latest articles on the hackernews website |
| 9 | + baseurl = "https://news.ycombinator.com/newest" |
| 10 | + |
| 11 | + # Number of articles requested by the user |
| 12 | + try: |
| 13 | + number_of_articles = int(input( |
| 14 | + '''Enter the number of articles you want from the hackernews website. |
| 15 | +(1-30) : ''')) |
| 16 | + except ValueError: |
| 17 | + print("\nYou did not enter a number. Try again.\n") |
| 18 | + sys.exit(1) |
| 19 | + |
| 20 | + if not 1 <= number_of_articles <= 30: |
| 21 | + print("\nYour input was not in the given range!\n") |
| 22 | + sys.exit(1) |
| 23 | + # Response obect to fetch the hackernews url |
| 24 | + response = requests.get(baseurl) |
| 25 | + |
| 26 | + # soup object for easy scrapping |
| 27 | + soup = bs(response.content, 'html.parser') |
| 28 | + |
| 29 | + # Finding all the a tags with the class storylink |
| 30 | + latest = soup.find_all('a', attrs={'class': 'storylink'}) |
| 31 | + |
| 32 | + # list to track the links of the articles |
| 33 | + links = [] |
| 34 | + |
| 35 | + # list to keep track of the names of the articles |
| 36 | + titles = [] |
| 37 | + |
| 38 | + # Fetching the links and names from the soup object |
| 39 | + # storing them in respective lists |
| 40 | + for article in latest: |
| 41 | + links.append(article['href']) |
| 42 | + titles.append(article.text) |
| 43 | + |
| 44 | + result = [] |
| 45 | + |
| 46 | + for title, link in zip(titles[:number_of_articles], |
| 47 | + links[:number_of_articles]): |
| 48 | + d = {} |
| 49 | + d["News Title"] = title |
| 50 | + d["Link to the News"] = link |
| 51 | + result.append(d) |
| 52 | + |
| 53 | + keys = ["News Title", "Link to the News"] |
| 54 | + |
| 55 | + with open("hackernews_latest.csv", "w") as hackernews: |
| 56 | + writer = csv.DictWriter(hackernews, fieldnames=keys) |
| 57 | + writer.writeheader() |
| 58 | + writer.writerows(result) |
| 59 | + |
| 60 | + return |
| 61 | + |
| 62 | + |
| 63 | +if __name__ == "__main__": |
| 64 | + main() |
| 65 | + print("\nYour news file has been successfully created!\n") |
0 commit comments