-
Notifications
You must be signed in to change notification settings - Fork 690
/
Copy pathwebscraping.py
34 lines (30 loc) · 1023 Bytes
/
webscraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import requests
from bs4 import BeautifulSoup
def cars_brand_links():
url = 'https://www.carsprite.com/en/car-prices/'
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.findAll("a"):
href = "https://www.carsprite.com/en/" + link.get('href')
if "car-prices/" not in href:
pass
else:
data = href
get_single_item_data(data)
def get_single_item_data(brand_url):
source_code = requests.get(brand_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.findAll("a"):
href1 = link.get('href')
if "/en/" not in href1:
data1 = href1
if "https" not in data1:
data2 = data1
if "/car-prices/" not in data2:
data_final = 'https://www.carsprite.com/en/car-prices/' + data2
print(data_final)
else:
pass
cars_brand_links()