python-geeks · Oct 3, 2021
diff --git a/‎datascience_books_scraper/500Datasciencebooks.csv
Lines changed: 500 additions & 0 deletions b/‎datascience_books_scraper/500Datasciencebooks.csv
Lines changed: 500 additions & 0 deletions
diff --git a/‎datascience_books_scraper/README.md
Lines changed: 41 additions & 0 deletions b/‎datascience_books_scraper/README.md
Lines changed: 41 additions & 0 deletions
diff --git a/‎datascience_books_scraper/requirements.txt
Lines changed: 22 additions & 0 deletions b/‎datascience_books_scraper/requirements.txt
Lines changed: 22 additions & 0 deletions
diff --git a/‎datascience_books_scraper/scrape.py
Lines changed: 45 additions & 0 deletions b/‎datascience_books_scraper/scrape.py
Lines changed: 45 additions & 0 deletions
@@ -0,0 +1,41 @@
+
+# 500 Data Science Books Scraper.
+
+This Python Script is intended to scrape the popular books on Data Science from https://1lib.in/s/data%20science
+
+
+## Run Locally
+
+Clone the project
+
+```bash
+  git clone https://github.com/python-geeks/Automation-scripts.git
+```
+
+Go to the project directory
+
+```bash
+  cd Automation-scripts/Datascience_books_Scraper 
+```
+
+Install dependencies
+
+```bash
+  pip install -r requirements.txt
+```
+
+Run the script
+
+```bash
+  python scrape.py
+```
+Wait for few seconds then check your directory for file named :
+```
+500Datasciencebooks.csv
+
+```
+## Author
+
+- [@ManthanShettigar](https://github.com/ManthanShettigar)
+
+  
@@ -0,0 +1,22 @@
+astroid==2.8.0
+autopep8==1.5.7
+beautifulsoup4==4.10.0
+bs4==0.0.1
+certifi==2021.5.30
+charset-normalizer==2.0.6
+flake8==3.9.2
+idna==3.2
+isort==5.9.3
+lazy-object-proxy==1.6.0
+lxml==4.6.3
+mccabe==0.6.1
+platformdirs==2.4.0
+pycodestyle==2.7.0
+pyflakes==2.3.1
+pylint==2.11.1
+requests==2.26.0
+soupsieve==2.2.1
+toml==0.10.2
+typing-extensions==3.10.0.2
+urllib3==1.26.7
+wrapt==1.12.1
@@ -0,0 +1,45 @@
+import requests
+from bs4 import BeautifulSoup
+import csv
+Book_name = []
+Year = []
+Publisher = []
+Author = []
+
+for j in range(1, 11):
+    source = requests.get(
+        f'https://1lib.in/s/data%20science?page={j}').text
+    soup = BeautifulSoup(source, 'lxml')
+    books = soup.find_all('table', attrs={'style': 'width:100%;height:100%;'})
+    for i in books:
+        # book name
+        try:
+            Book_name.append(i.find('h3').text.strip())
+        except Exception:
+            Book_name.append('nan')
+        # year
+        try:
+            Year.append(
+                i.find('div', class_='property_year').text.strip()[6:10])
+        except Exception:
+            Year.append('nan')
+        # publisher
+        try:
+            Publisher.append(
+                i.find('div', attrs={'title': 'Publisher'}).text.strip())
+        except Exception:
+            Publisher.append('nan')
+        # Author
+        try:
+            Author.append(i.find('div', class_='authors').text.strip())
+        except Exception:
+            Author.append('nan')
+
+file_name = '500Datasciencebooks.csv'
+
+with open(file_name, 'w') as file:
+    writer = csv.writer(file)
+    writer.writerow(['Sr.No', 'Book name', 'Publisher', 'Author', 'Year'])
+
+    for i in range(1, len(Book_name)):
+        writer.writerow([i, Book_name[i], Publisher[i], Author[i], Year[i]])