Merge pull request #867 from Indrahas/Indrahas

pawangeek · web-flow · commit dd3ee47ee754 · 2022-10-03T08:11:26.000+05:30
Added automation script for downloading xkcd comics
diff --git a/xkcd_downloader/README.md b/xkcd_downloader/README.md
@@ -0,0 +1,12 @@
+## XKCD Comics downloader
+
+A Python script to download the latest XKCD comics.
+
+To run the script, go to the directory in bash,
+
+1. Run the script
+```bash
+python3 download_xkcd.py url_to_the_comic
+```
+
+2. The comics will be downloaded in the same directory as the script.
diff --git a/xkcd_downloader/download_xkcd.py b/xkcd_downloader/download_xkcd.py
@@ -0,0 +1,33 @@
+#! python3
+
+import requests
+import os
+import bs4
+
+url = input("The url of the XKCD comics")              # starting url
+os.makedirs('xkcd', exist_ok=True)    # store comics in ./xkcd
+while not url.endswith('#'):
+    print('Downloading page  %s...' % url)
+    res = requests.get(url)
+    res.raise_for_status()
+
+    soup = bs4.BeautifulSoup(res.text, 'html.parser')
+    comicElem = soup.select('#comic img')
+    if comicElem == []:
+        print('Could not find comic image.')
+    else:
+        comicUrl = 'https:' + comicElem[0].get('src')
+        # Download the image.
+        print('Downloading image %s...' % (comicUrl))
+        res = requests.get(comicUrl)
+        res.raise_for_status()
+        imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
+        for chunk in res.iter_content(100000):
+            imageFile.write(chunk)
+        imageFile.close()
+
+    # Get the Prev button's url.
+    prevLink = soup.select('a[rel="prev"]')[0]
+    url = 'https://xkcd.com' + prevLink.get('href')
+
+print('Done.')