3
3
from langchain import PromptTemplate
4
4
5
5
class ScrapingCodeGenerator :
6
+ MODEL_NAME = "text-davinci-003"
7
+ TEMPLATE = """
8
+ You are an expert website analyzer for a web scraping process.
9
+ Take the user requirements and convert it into clean python code to scrape the website.
10
+
11
+ USER REQUIREMENTS:
12
+ {requirements}
13
+
14
+ HTML CODE YOU NEED TO SCRAPE:
15
+ {html}
16
+
17
+ FINISH THE PYTHON CODE TO SCRAPE THE WEBSITE:
18
+
19
+ from bs4 import BeautifulSoup
20
+
21
+ # Get the URL of the website
22
+ with open('./results/denver.html') as f:
23
+ response = f.read()
24
+
25
+ html_soup = BeautifulSoup(response, 'html.parser')
26
+ """
27
+
6
28
static_code = """
7
29
from bs4 import BeautifulSoup
8
30
@@ -11,11 +33,20 @@ class ScrapingCodeGenerator:
11
33
response = f.read()
12
34
13
35
html_soup = BeautifulSoup(response, 'html.parser')
14
- """
15
- def __init__ (self , html_loader , llm , prompt_template ):
36
+ """
37
+
38
+ def __init__ (self , html_loader ):
16
39
self .html_loader = html_loader
17
- self .llm = llm
18
- self .prompt_template = prompt_template
40
+ self .llm = self .initialize_llm ()
41
+ self .prompt_template = self .initialize_template ()
42
+
43
+ def initialize_llm (self ):
44
+ load_dotenv ()
45
+ return OpenAI (model_name = self .MODEL_NAME , temperature = 0 )
46
+
47
+ def initialize_template (self ):
48
+ return PromptTemplate (input_variables = ["requirements" ,"html" ], template = self .TEMPLATE )
49
+
19
50
20
51
def generate_scraping_code (self , user_requirements ):
21
52
"""
@@ -26,11 +57,11 @@ def generate_scraping_code(self, user_requirements):
26
57
generated_code = self .llm (formatted_prompt )
27
58
28
59
full_scraping_code = f"""
29
- { self .static_code }
30
- { generated_code }
60
+ { self .static_code }
61
+ { generated_code }
31
62
"""
32
63
return full_scraping_code
33
-
64
+
34
65
class CodeWriter :
35
66
def __init__ (self , file_name ):
36
67
self .file_name = file_name
@@ -40,5 +71,4 @@ def write(self, scraping_code):
40
71
Writes the scraping code to a .py python file
41
72
"""
42
73
with open (self .file_name , 'w' ) as file :
43
- file .write (scraping_code )
44
-
74
+ file .write (scraping_code )
0 commit comments