Skip to content

Commit b2f919c

Browse files
committed
fixes: flake8 linting issues
1 parent a511288 commit b2f919c

File tree

1 file changed

+62
-51
lines changed

1 file changed

+62
-51
lines changed

codeforces_scraper/script.py

Lines changed: 62 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,34 @@
11
import requests
22
import json
3-
import bs4
3+
import bs4
44
import re
55

6-
CLEANR = re.compile('<.*?>')
6+
CLEANR = re.compile("<.*?>")
7+
78

89
def cleanhtml(raw_html):
9-
cleantext = re.sub(CLEANR, '', raw_html)
10-
return cleantext
10+
cleantext = re.sub(CLEANR, "", raw_html)
11+
return cleantext
12+
1113

1214
def split_value_and_unit(soup):
13-
l = soup.split()
14-
return {
15-
"value": int(l[0]),
16-
"unit": l[1]
17-
}
15+
length = soup.split()
16+
return {"value": int(length[0]), "unit": length[1]}
1817

1918

2019
def test_group(lst):
2120
return [{"input": _in, "output": _out} for _in, _out in pairwise(lst)]
2221

2322

2423
def test_sample(souped_html):
25-
return test_group(get_tags_contents(souped_html, 'pre'))
24+
return test_group(get_tags_contents(souped_html, "pre"))
2625

2726

2827
def get_tags_contents(souped_html, tag_name, class_name=None):
29-
return [concat_contents(tag.contents) for tag in souped_html.find_all(tag_name, class_name)]
28+
return [
29+
concat_contents(tag.contents)
30+
for tag in souped_html.find_all(tag_name, class_name)
31+
]
3032

3133

3234
def pairwise(iterable):
@@ -35,11 +37,11 @@ def pairwise(iterable):
3537

3638

3739
def get_statement(soup):
38-
return concat_contents(soup.find('div', 'header').next_sibling.contents)
40+
return concat_contents(soup.find("div", "header").next_sibling.contents)
3941

4042

41-
def get_content(soup, _class=''):
42-
element = soup.find('div', _class)
43+
def get_content(soup, _class=""):
44+
element = soup.find("div", _class)
4345
if not element:
4446
return None
4547
tags = element.contents
@@ -48,84 +50,93 @@ def get_content(soup, _class=''):
4850

4951

5052
def concat_contents(ls):
51-
return ''.join([str(i) for i in ls])
53+
return "".join([str(i) for i in ls])
54+
5255

5356
def scrap_wraper(problem_link):
5457
markup = requests.get(problem_link).text
5558
soup = bs4.BeautifulSoup(markup, "html.parser")
5659
problem = {
57-
"title": soup.find('div', 'title').string,
58-
"timeLimit": split_value_and_unit(soup.find('div', 'time-limit').contents[1].string),
59-
"memoryLimit": split_value_and_unit(soup.find('div', 'memory-limit').contents[1].string),
60+
"title": soup.find("div", "title").string,
61+
"timeLimit": split_value_and_unit(
62+
soup.find("div", "time-limit").contents[1].string
63+
),
64+
"memoryLimit": split_value_and_unit(
65+
soup.find("div", "memory-limit").contents[1].string
66+
),
6067
"statement": get_statement(soup),
61-
"inputSpecification": get_content(soup, 'input-specification'),
62-
"outputSpecification": get_content(soup, 'output-specification'),
68+
"inputSpecification": get_content(soup, "input-specification"),
69+
"outputSpecification": get_content(soup, "output-specification"),
6370
"samples": test_sample(soup),
64-
"note": get_content(soup, 'note'),
71+
"note": get_content(soup, "note"),
6572
}
6673
return problem
6774

6875

69-
70-
def get_all_problems() :
71-
url = 'https://codeforces.com/api/problemset.problems'
72-
print (url)
76+
def get_all_problems():
77+
url = "https://codeforces.com/api/problemset.problems"
78+
print(url)
7379

7480
r = requests.get(url)
7581

76-
if ( r.status_code == 200 ):
82+
if r.status_code == 200:
7783
data = r.json()
7884
print(json.dumps(data["result"]["problems"], sort_keys=True, indent=4))
7985
else:
8086
print("SORRY! SERVER ERROR EXISTS")
81-
82-
def get_all_problems_by_tag (tag):
83-
url = 'https://codeforces.com/api/problemset.problems'
87+
88+
89+
def get_all_problems_by_tag(tag):
90+
url = "https://codeforces.com/api/problemset.problems"
8491

8592
r = requests.get(url)
8693

87-
if ( r.status_code == 200 ):
94+
if r.status_code == 200:
8895
data = r.json()
8996
list_of_all_problems = data["result"]["problems"]
9097
for index in list_of_all_problems:
9198
tags_of_problem = index["tags"]
92-
if tags_of_problem.count(tag) :
99+
if tags_of_problem.count(tag):
93100
print(index)
94-
95-
96-
else :
101+
102+
else:
97103
print("SORRY! SERVER ERROR EXISTS")
98104

99-
def get_problem_statement_by_id_and_index ( id , index ):
100-
url = "https://codeforces.com/problemset/problem/" + id + "/" + index
105+
106+
def get_problem_statement_by_id_and_index(id, index):
107+
url = "https://codeforces.com/problemset/problem/" + id + "/" + index
101108
data = scrap_wraper(url)
102-
print (cleanhtml(data["statement"]))
103-
print (cleanhtml(data["inputSpecification"]))
104-
print (cleanhtml(data["outputSpecification"]))
109+
print(cleanhtml(data["statement"]))
110+
print(cleanhtml(data["inputSpecification"]))
111+
print(cleanhtml(data["outputSpecification"]))
112+
105113

106114
def main():
107115
ch = "YES"
108116
while ch == "YES":
109-
print( "PLEASE SELECT ANY ONE OF THE BELOW : \n1. GET ALL PROBLEMS \n2. GET ALL PROBLEMS BY TAGS \n3. GET PROBLEM STATEMENT ")
117+
print("PLEASE SELECT ANY ONE OF THE BELOW :")
118+
print("\n1. GET ALL PROBLEMS")
119+
print("\n2. GET ALL PROBLEMS BY TAGS \n3. GET PROBLEM STATEMENT ")
110120

111121
answer = int(input())
112-
113-
if ( answer == 1 ):
122+
123+
if answer == 1:
114124
get_all_problems()
115-
116-
elif ( answer == 2 ):
117-
print ("\nPlease Enter Your Tag : ")
125+
126+
elif answer == 2:
127+
print("\nPlease Enter Your Tag : ")
118128
tag = input()
119129
get_all_problems_by_tag(tag)
120-
121-
elif ( answer == 3 ):
122-
print ("\nPlease Enter Id and Index as Follows : \nId : ")
130+
131+
elif answer == 3:
132+
print("\nPlease Enter Id and Index as Follows : \nId : ")
123133
id = input()
124-
print ("\nIndex : ")
134+
print("\nIndex : ")
125135
index = input()
126136
get_problem_statement_by_id_and_index(id, index)
127-
137+
128138
ch = input("WOULD YOU LIKE TO CONTINUE : ")
129-
139+
140+
130141
if __name__ == "__main__":
131142
main()

0 commit comments

Comments
 (0)