|
| 1 | +# scrapped from justdial.com |
| 2 | +#Developed By-Saranya S kumar |
| 3 | +# Date - 29-06-2017 |
| 4 | +# import all the libraries that we are going to use. |
| 5 | +import urllib2 |
| 6 | +from bs4 import BeautifulSoup |
| 7 | +import csv |
| 8 | +from datetime import datetime |
| 9 | +#specify the url |
| 10 | +req = urllib2.Request("https://www.justdial.com/Kochi/Banquet-Halls/nct-10035861", headers={'User-Agent' : "Magic Browser"}) |
| 11 | +page = urllib2.urlopen( req ) |
| 12 | +# parse the html using BeautifulSoup and store in a variable 'soup' |
| 13 | +soup=BeautifulSoup(page,'html.parser') |
| 14 | + |
| 15 | +# Find Listing Blocks |
| 16 | +for result in soup.find_all('div',attrs={"class":"store-details"}): |
| 17 | + # Extract Listing Name |
| 18 | + listingName = result.findNext('h4',attrs={"class" : "store-name"}) |
| 19 | + # Extract Listing Address |
| 20 | + listingAddress = result.findNext('span',attrs={"class" : "mrehover"}) |
| 21 | + #print listingAddress |
| 22 | + # Extract Listing Phone Number |
| 23 | + listingPhone = result.findNext('p',attrs={"class" : "contact-info "}) |
| 24 | + # Printing the Data |
| 25 | + #print listingName |
| 26 | + print listingName.text, " - " + listingAddress.text, " - ", listingPhone.text |
| 27 | + # Opening CSV file / create |
| 28 | + with open("justdial.com_hotel.csv", "a") as csv_file: |
| 29 | + # Write Buffer |
| 30 | + writer = csv.writer(csv_file) |
| 31 | + # Writing Variables to csv file |
| 32 | + writer.writerow([listingName.text, listingAddress.text, listingPhone.text, datetime.now()]) |
0 commit comments