Skip to content

Commit 0d7e2f4

Browse files
committed
update .py
1 parent 7999c95 commit 0d7e2f4

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

automaticwebsiteurlscraper.py/webUrlscraper.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,32 @@
55
from urllib.request import urlopen, Request
66
from bs4 import BeautifulSoup
77

8-
ctx=ssl.create_default_context()
8+
ctx = ssl.create_default_context( )
99
ctx.check_hostname = False
1010
ctx.verify_mode = ssl.CERT_NONE
1111

1212
# getting in the website link
13-
Url=input("Enter your Urllink")
14-
try :
13+
Url = input("Enter your Urllink")
14+
try :
1515
# trying to access the page
16-
page=Request(Url, headers={'User-Agent':'Mozilla/5.0'})
17-
page=urlopen(page, context=ctx, ).read()
16+
page = Request(Url, headers={'User-Agent':'Mozilla/5.0'})
17+
page = urlopen(page, context=ctx).read()
1818
# Using beautifulsoup to read the contents of the page
19-
soup=BeautifulSoup(page, 'html.parser')
19+
soup = BeautifulSoup(page, 'html.parser')
2020
# finding all the link headers
21-
links=soup.findAll('a')
22-
if(links is not None) :
23-
finalLinks=[]
21+
links = soup.findAll('a')
22+
if(links is not None) :
23+
finalLinks = [ ]
2424
# getting actual site links from the header a
25-
for link in links :
26-
if 'href' in str(link):
25+
for link in links :
26+
if 'href' in str(link) :
2727
templist = str(link).split("href")
2828
index1 = templist[-1].index("\"")
2929
index2 = templist[-1][index1 + 1 :].index( "\"" )
3030
finalLinks.append(templist[-1][index1 : index2 + 3])
3131
print("Here are your final links")
3232
# printing the final completed list
33-
for i in finalLinks :
33+
for i in finalLinks :
3434
print(i)
35-
except Exception as e :
35+
except Exception as e :
3636
print(str(e))

0 commit comments

Comments
 (0)