4
4
LIMIT = 50
5
5
URL = f"https://www.indeed.com/jobs?q=Python&limit={ LIMIT } "
6
6
7
- def extract_indeed_pages ():
7
+ def get_last_page ():
8
8
result = requests .get (URL )
9
9
10
10
soup = BeautifulSoup (result .text , "html.parser" )
@@ -17,11 +17,48 @@ def extract_indeed_pages():
17
17
18
18
for link in links [:- 1 ]: #마지막 요소는 읽지 않겠다는 뜻
19
19
pages .append (int (link .string )) #string -> integer 변환
20
-
21
20
max_page = pages [- 1 ]
22
21
return max_page
23
22
24
- def extract_indeed_jobs (last_page ):
23
+ def extract_job (html ):
24
+ title = html .find ("div" , {"class" : "title" }).find ("a" )["title" ]
25
+ company = html .find ("span" , {"class" :"company" })
26
+
27
+ if company :
28
+ company_anchor = company .find ("a" )
29
+ if company_anchor is not None :
30
+ company = str (company_anchor .string )
31
+ else :
32
+ company = str (company .string )
33
+ company = company .strip ()
34
+ else :
35
+ company = None
36
+
37
+ location = html .find ("div" ,{"class" : "recJobLoc" })["data-rc-loc" ] #div의 attribute를 가져옴
38
+ job_id = html ["data-jk" ]
39
+
40
+ return {
41
+ 'title' : title ,
42
+ 'company' : company ,
43
+ 'location' : location ,
44
+ "link" :f"https://www.indeed.com/viewjob?jk={ job_id } "
45
+ }
46
+
47
+ def extract_jobs (last_page ):
48
+ jobs = []
25
49
for page in range (last_page ):
26
- res = requests .get (f"{ URL } &start={ page * LIMIT } " )
27
- print (res .status_code )
50
+ res = requests .get (f"{ URL } &start={ last_page * LIMIT } " )
51
+ soup = BeautifulSoup (res .text , "html.parser" )
52
+ results = soup .find_all ("div" , {"class" :"jobsearch-SerpJobCard" })
53
+
54
+ #print(results)
55
+ for res in results :
56
+ job = extract_job (res )
57
+ jobs .append (job )
58
+ return jobs
59
+
60
+
61
+ def get_jobs ():
62
+ last_page = get_last_page ()
63
+ jobs = extract_jobs (last_page )
64
+ return jobs
0 commit comments