Skip to content

Commit adb8056

Browse files
committed
add scripts and problemset
1 parent e632b8e commit adb8056

File tree

1,653 files changed

+379412
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,653 files changed

+379412
-0
lines changed

1.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# coding:utf-8
2+
import re
3+
import json
4+
import os
5+
import threading
6+
import time
7+
import requests
8+
from requests.exceptions import RequestException
9+
from bs4 import BeautifulSoup
10+
11+
import random
12+
13+
def get_proble_set(url):
14+
try:
15+
response = requests.get(url)
16+
if response.status_code == 200:
17+
return response.text
18+
return None
19+
except RequestException:
20+
return None
21+
22+
def parse_proble_set(problemSet):
23+
# print(len(problemSet)) # 2218
24+
# for i in range(len(problemSet)):
25+
for i in range(930, len(problemSet)):
26+
title = problemSet[i]["stat"]["question__title_slug"]
27+
if os.path.exists("[no content]{}.json".format(title)) or os.path.exists("{}.json".format(title)):
28+
print(i, "has been parsed.")
29+
# print("The question has been parsed: {}".format(title))
30+
continue
31+
#construct_url(title)
32+
# time.sleep(0.5)
33+
time.sleep(1)
34+
# time.sleep(random.randint(0,9) / 10)
35+
t =threading.Thread(target=construct_url,args=(title,))
36+
t.start()
37+
38+
print(i, "is done.")
39+
continue
40+
41+
def construct_url(problemTitle):
42+
url = "https://leetcode.com/problems/"+ problemTitle + "/description/"
43+
# print(url)
44+
get_proble_content(url,problemTitle)
45+
46+
def save_problem(title,content):
47+
#content = bytes(content,encoding = 'utf8')
48+
filename = title + ".html"
49+
with open(filename,'w+',encoding="utf-8")as f:
50+
f.write(content)
51+
52+
def get_proble_content(problemUrl,title):
53+
response = requests.get(problemUrl)
54+
setCookie = response.headers["Set-Cookie"]
55+
'''
56+
print(setCookie)
57+
setCookie = json.loads(setCookie)
58+
print(type(setCookie))
59+
'''
60+
try:
61+
pattern = re.compile("csrftoken=(.*?);.*?",re.S)
62+
csrftoken = re.search(pattern, setCookie)
63+
url = "https://leetcode.com/graphql"
64+
data = {
65+
#"operationName":"getQuestionDetail",
66+
"operationName":"questionData",
67+
"variables":{"titleSlug":title},
68+
# "query":"query getQuestionDetail($titleSlug: String!) {\n isCurrentUserAuthenticated\n question(titleSlug: $titleSlug) {\n questionId\n questionFrontendId\n questionTitle\n translatedTitle\n questionTitleSlug\n content\n translatedContent\n difficulty\n stats\n allowDiscuss\n contributors\n similarQuestions\n mysqlSchemas\n randomQuestionUrl\n sessionId\n categoryTitle\n submitUrl\n interpretUrl\n codeDefinition\n sampleTestCase\n enableTestMode\n metaData\n enableRunCode\n enableSubmit\n judgerAvailable\n infoVerified\n envInfo\n urlManager\n article\n questionDetailUrl\n libraryUrl\n companyTags {\n name\n slug\n translatedName\n __typename\n }\n companyTagStats\n topicTags {\n name\n slug\n translatedName\n __typename\n }\n __typename\n }\n interviewed {\n interviewedUrl\n companies {\n id\n name\n slug\n __typename\n }\n timeOptions {\n id\n name\n __typename\n }\n stageOptions {\n id\n name\n __typename\n }\n __typename\n }\n subscribeUrl\n isPremium\n loginUrl\n}\n"
69+
"query": "query questionData($titleSlug: String!) {\n question(titleSlug: $titleSlug) {\n questionId\n questionFrontendId\n boundTopicId\n title\n titleSlug\n content\n translatedTitle\n translatedContent\n isPaidOnly\n difficulty\n likes\n dislikes\n isLiked\n similarQuestions\n exampleTestcases\n categoryTitle\n contributors {\n username\n profileUrl\n avatarUrl\n __typename\n }\n topicTags {\n name\n slug\n translatedName\n __typename\n }\n companyTagStats\n codeSnippets {\n lang\n langSlug\n code\n __typename\n }\n stats\n hints\n solution {\n id\n canSeeDetail\n paidOnly\n hasVideoSolution\n paidOnlyVideo\n __typename\n }\n status\n sampleTestCase\n metaData\n judgerAvailable\n judgeType\n mysqlSchemas\n enableRunCode\n enableTestMode\n enableDebugger\n envInfo\n libraryUrl\n adminUrl\n challengeQuestion {\n id\n date\n incompleteChallengeCount\n streakCount\n type\n __typename\n }\n __typename\n }\n}\n"
70+
}
71+
headers = {
72+
'x-csrftoken': csrftoken.group(1),
73+
'referer':problemUrl,
74+
'content-type':'application/json',
75+
'origin':'https://leetcode.com',
76+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
77+
}
78+
cookies = {
79+
'__cfduid':'d9ce37537c705e759f6bea15fffc9c58b1525271602',
80+
'_ga':'GA1.2.5783653.1525271604',
81+
'_gid':'GA1.2.344320119.1533189808',
82+
'csrftoken':csrftoken.group(1),
83+
' _gat':'1'
84+
}
85+
#payload表单为json格式
86+
87+
dumpJsonData = json.dumps(data)
88+
response = requests.post(url,data = dumpJsonData, headers = headers,cookies = cookies)
89+
dictInfo = json.loads(response.text)
90+
if dictInfo["data"]["question"].get("content") is not None:
91+
saveJSON(dictInfo, title + ".json")
92+
content = dictInfo["data"]["question"]["content"]
93+
save_problem(title,content)
94+
# soup = BeautifulSoup(content, 'lxml')
95+
# save_problem(title,soup.prettify())
96+
else:
97+
saveJSON(dictInfo, "[no content]" + title + ".json")
98+
# print("no content")
99+
except Exception as e:
100+
print("[error] ", e, problemUrl)
101+
102+
def saveJSON(data, filename):
103+
with open(filename, 'w', encoding='utf-8') as f:
104+
json.dump(data, f, ensure_ascii=False, indent=4)
105+
106+
def main():
107+
# url = "https://leetcode.com/api/problems/all/"
108+
# html = json.loads(get_proble_set(url))
109+
# problemset = html["stat_status_pairs"]
110+
# saveJSON(html, "[en]json1-origin-data.json")
111+
# saveJSON(problemset, "[en]json2-problemset.json")
112+
113+
# url = "https://leetcode-cn.com/api/problems/all/"
114+
# html = json.loads(get_proble_set(url))
115+
# problemset = html["stat_status_pairs"]
116+
# saveJSON(html, "[cn]json1-origin-data.json")
117+
# saveJSON(problemset, "[cn]json2-problemset.json")
118+
# exit()
119+
120+
problemset = json.load(open("[en]json2-problemset.json", 'r', encoding='utf-8'))
121+
parse_proble_set(problemset)
122+
123+
124+
if __name__=='__main__':
125+
if os.path.exists("算法题"):
126+
os.chdir("算法题")
127+
else:
128+
os.mkdir("算法题")
129+
os.chdir("算法题")
130+
main()

0 commit comments

Comments
 (0)