Skip to content

Commit 56c2bf1

Browse files
Add files via upload
1 parent 16b9337 commit 56c2bf1

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# The program will use urllib to read the HTML from the data files below, extract
2+
# the href= vaues from the anchor tags, scan for a tag that is in a particular
3+
# position relative to the first name in the list, follow that link and repeat
4+
# the process a number of times and report the last name you find.
5+
6+
import urllib.request,urllib.parse,urllib.error
7+
from bs4 import BeautifulSoup
8+
import ssl
9+
import re
10+
11+
# Ignore SSL certificate errors
12+
ctx = ssl.create_default_context()
13+
ctx.check_hostname = False
14+
ctx.verify_mode = ssl.CERT_NONE
15+
loop_count=1
16+
url=input('Enter URL: ')
17+
# below is to allow for testing using default url
18+
if len(url)<1:
19+
url='http://py4e-data.dr-chuck.net/known_by_Fikret.html'
20+
count=4
21+
position=3
22+
else:
23+
count=input('Enter Count: ')
24+
position=input('Enter Position: ')
25+
while True:
26+
html=urllib.request.urlopen(url,context=ctx).read()
27+
parsed_html_File=BeautifulSoup(html,'html.parser')
28+
29+
linkL=list()
30+
anchor_tags=parsed_html_File('a')
31+
for tag in anchor_tags:
32+
link=tag.get('href',None)
33+
linkL.append(link)
34+
url=linkL[(int(position)-1)]
35+
loop_count=loop_count+1
36+
if loop_count>int(count):
37+
break
38+
print(url)
39+
print("Name of the person: ",re.findall('by_(.*).html',url)[0])

0 commit comments

Comments
 (0)