|
5 | 5 | from urllib.request import urlopen, Request
|
6 | 6 | from bs4 import BeautifulSoup
|
7 | 7 |
|
8 |
| -ctx=ssl.create_default_context() |
| 8 | +ctx = ssl.create_default_context( ) |
9 | 9 | ctx.check_hostname = False
|
10 | 10 | ctx.verify_mode = ssl.CERT_NONE
|
11 | 11 |
|
12 | 12 | # getting in the website link
|
13 |
| -Url=input("Enter your Urllink") |
14 |
| -try : |
| 13 | +Url = input("Enter your Urllink") |
| 14 | +try : |
15 | 15 | # trying to access the page
|
16 |
| - page=Request(Url, headers={'User-Agent':'Mozilla/5.0'}) |
17 |
| - page=urlopen(page, context=ctx, ).read() |
| 16 | + page = Request(Url, headers={'User-Agent':'Mozilla/5.0'}) |
| 17 | + page = urlopen(page, context=ctx).read() |
18 | 18 | # Using beautifulsoup to read the contents of the page
|
19 |
| - soup=BeautifulSoup(page, 'html.parser') |
| 19 | + soup = BeautifulSoup(page, 'html.parser') |
20 | 20 | # finding all the link headers
|
21 |
| - links=soup.findAll('a') |
22 |
| - if(links is not None) : |
23 |
| - finalLinks=[] |
| 21 | + links = soup.findAll('a') |
| 22 | + if(links is not None) : |
| 23 | + finalLinks = [ ] |
24 | 24 | # getting actual site links from the header a
|
25 |
| - for link in links : |
26 |
| - if 'href' in str(link): |
| 25 | + for link in links : |
| 26 | + if 'href' in str(link) : |
27 | 27 | templist = str(link).split("href")
|
28 | 28 | index1 = templist[-1].index("\"")
|
29 | 29 | index2 = templist[-1][index1 + 1 :].index( "\"" )
|
30 | 30 | finalLinks.append(templist[-1][index1 : index2 + 3])
|
31 | 31 | print("Here are your final links")
|
32 | 32 | # printing the final completed list
|
33 |
| - for i in finalLinks : |
| 33 | + for i in finalLinks : |
34 | 34 | print(i)
|
35 |
| -except Exception as e : |
| 35 | +except Exception as e : |
36 | 36 | print(str(e))
|
0 commit comments