@@ -20,15 +20,15 @@ This tool integrates ScrapeGraph with LlamaIndex, providing intelligent web scra
20
20
Install the package using pip:
21
21
22
22
``` bash
23
- pip install llama-index-tools-scrapegraph
23
+ pip install llama-index-tools-scrapegraphai
24
24
```
25
25
26
26
## Usage
27
27
28
28
First, import and initialize the ScrapegraphToolSpec:
29
29
30
30
``` python
31
- from llama_index.tools.scrapegraph import ScrapegraphToolSpec
31
+ from llama_index.tools.scrapegraph.base import ScrapegraphToolSpec
32
32
33
33
scrapegraph_tool = ScrapegraphToolSpec()
34
34
```
@@ -40,20 +40,27 @@ scrapegraph_tool = ScrapegraphToolSpec()
40
40
Extract structured data using a schema:
41
41
42
42
``` python
43
- from pydantic import BaseModel
44
-
45
- class ProductSchema (BaseModel ):
46
- name: str
47
- price: float
48
- description: str
49
-
50
- schema = [ProductSchema]
51
- result = scrapegraph_tool.scrapegraph_smartscraper(
52
- prompt = " Extract product information" ,
53
- url = " https://example.com/product" ,
54
- api_key = " your-api-key" ,
55
- schema = schema,
56
- )
43
+ from pydantic import BaseModel, Field
44
+
45
+ class FounderSchema (BaseModel ):
46
+ name: str = Field(description = " Name of the founder" )
47
+ role: str = Field(description = " Role of the founder" )
48
+ social_media: str = Field(description = " Social media URL of the founder" )
49
+
50
+ class ListFoundersSchema (BaseModel ):
51
+ founders: list[FounderSchema] = Field(description = " List of founders" )
52
+
53
+ response = scrapegraph_tool.scrapegraph_smartscraper(
54
+ prompt = " Extract product information" ,
55
+ url = " https://scrapegraphai.com/" ,
56
+ api_key = " sgai-***" ,
57
+ schema = ListFoundersSchema,
58
+ )
59
+
60
+ result = response[" result" ]
61
+
62
+ for founder in result[" founders" ]:
63
+ print (founder)
57
64
```
58
65
59
66
### Smart Scraping (Async)
@@ -107,37 +114,6 @@ credits = scrapegraph_tool.scrapegraph_get_credits(api_key="your-api-key")
107
114
</Card >
108
115
</CardGroup >
109
116
110
- ## Example: Product Information Extraction
111
-
112
- ``` python
113
- from llama_index.tools.scrapegraph import ScrapegraphToolSpec
114
- from pydantic import BaseModel, Field
115
- from typing import List
116
-
117
- # Define your schema
118
- class ProductInfo (BaseModel ):
119
- name: str = Field(description = " Product name" )
120
- price: float = Field(description = " Product price" )
121
- features: List[str ] = Field(description = " Product features" )
122
- description: str = Field(description = " Product description" )
123
-
124
- # Initialize the tool
125
- tool = ScrapegraphToolSpec()
126
-
127
- # Extract product information
128
- result = tool.scrapegraph_smartscraper(
129
- prompt = " Extract detailed product information" ,
130
- url = " https://example.com/product" ,
131
- api_key = " your-api-key" ,
132
- schema = [ProductInfo]
133
- )
134
-
135
- # Process the results
136
- print (f " Product Name: { result.name} " )
137
- print (f " Price: $ { result.price} " )
138
- print (" Features:" , * result.features, sep = " \n - " )
139
- ```
140
-
141
117
## Support
142
118
143
119
Need help with the integration?
0 commit comments