Skip to content

Commit d3202ff

Browse files
committed
notebook updated
1 parent 65ef9d3 commit d3202ff

File tree

1 file changed

+40
-57
lines changed

1 file changed

+40
-57
lines changed

notebook.ipynb

+40-57
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": 1,
19+
"execution_count": null,
2020
"metadata": {},
2121
"outputs": [
2222
{
@@ -31,7 +31,7 @@
3131
}
3232
],
3333
"source": [
34-
"%pip install -qU deeplake openai"
34+
"!pip install deeplake openai"
3535
]
3636
},
3737
{
@@ -45,19 +45,21 @@
4545
"cell_type": "markdown",
4646
"metadata": {},
4747
"source": [
48-
"We use the `wget` command to download a JSON file containing scraped restaurant data directly from the specified GitHub repository. This file, `scraped_restaurant_data.json`, will be saved to the current working directory, making it readily available for data processing and analysis tasks in the notebook. If you want to perform the scraping yourself, you can use libraries like **[Scrape Graph AI](https://github.com/ScrapeGraphAI/Scrapegraph-ai)** to gather data directly from websites."
48+
"We use the `requests` command to download a JSON file containing scraped restaurant data directly from the specified GitHub repository. This file, `scraped_restaurant_data.json`, will be saved to the current working directory, making it readily available for data processing and analysis tasks in the notebook. If you want to perform the scraping yourself, you can use libraries like **[Scrape Graph AI](https://github.com/ScrapeGraphAI/Scrapegraph-ai)** to gather data directly from websites."
4949
]
5050
},
5151
{
52-
"cell_type": "markdown",
52+
"cell_type": "code",
53+
"execution_count": null,
5354
"metadata": {},
55+
"outputs": [],
5456
"source": [
55-
"%pip install -q requests"
57+
"!pip install requests"
5658
]
5759
},
5860
{
5961
"cell_type": "code",
60-
"execution_count": 158,
62+
"execution_count": 13,
6163
"metadata": {},
6264
"outputs": [],
6365
"source": [
@@ -66,27 +68,12 @@
6668
"url = \"https://raw.githubusercontent.com/activeloopai/notebook-v4/refs/heads/main/scraped_restaurant_data.json\"\n",
6769
"file_name = \"scraped_restaurant_data.json\"\n",
6870
"\n",
69-
"response = requests.get(url)\n",
70-
"response.raise_for_status() # Check if the request was successful\n",
71-
"\n",
72-
"with open(file_name, \"wb\") as file:\n",
73-
" file.write(response.content)"
74-
]
75-
},
76-
{
77-
"cell_type": "code",
78-
"execution_count": 159,
79-
"metadata": {},
80-
"outputs": [],
81-
"source": [
82-
"import json\n",
83-
"with open(\"scraped_restaurant_data.json\", \"r\") as f:\n",
84-
" scraped_data = json.load(f)"
71+
"scraped_data = requests.get(url).json()"
8572
]
8673
},
8774
{
8875
"cell_type": "code",
89-
"execution_count": 160,
76+
"execution_count": 3,
9077
"metadata": {},
9178
"outputs": [
9279
{
@@ -158,12 +145,12 @@
158145
},
159146
{
160147
"cell_type": "code",
161-
"execution_count": 74,
148+
"execution_count": null,
162149
"metadata": {},
163150
"outputs": [],
164151
"source": [
165-
"ds.add_column(\"restaurant_name\", types.Text(index_type=types.TextIndexType.Inverted))\n",
166-
"ds.add_column(\"restaurant_description\", types.Text(index_type=types.TextIndexType.Inverted))"
152+
"ds.add_column(\"restaurant_name\", types.Text(index_type=types.Inverted))\n",
153+
"ds.add_column(\"restaurant_description\", types.Text(index_type=types.Inverted))"
167154
]
168155
},
169156
{
@@ -182,7 +169,7 @@
182169
},
183170
{
184171
"cell_type": "code",
185-
"execution_count": 76,
172+
"execution_count": 14,
186173
"metadata": {},
187174
"outputs": [],
188175
"source": [
@@ -209,7 +196,7 @@
209196
},
210197
{
211198
"cell_type": "code",
212-
"execution_count": 77,
199+
"execution_count": 15,
213200
"metadata": {},
214201
"outputs": [],
215202
"source": [
@@ -258,7 +245,7 @@
258245
},
259246
{
260247
"cell_type": "code",
261-
"execution_count": 79,
248+
"execution_count": 16,
262249
"metadata": {},
263250
"outputs": [
264251
{
@@ -267,19 +254,17 @@
267254
"Dataset(columns=(restaurant_name,restaurant_description), length=4)"
268255
]
269256
},
270-
"execution_count": 79,
257+
"execution_count": 16,
271258
"metadata": {},
272259
"output_type": "execute_result"
273260
}
274261
],
275262
"source": [
276263
"word = 'burritos'\n",
277-
"view = ds.query(\n",
278-
" f\"\"\"\n",
279-
" SELECT *\n",
264+
"view = ds.query(f\"\"\"\n",
265+
" SELECT * \n",
280266
" WHERE CONTAINS(restaurant_description, '{word}')\n",
281-
" \"\"\"\n",
282-
")\n",
267+
"\"\"\")\n",
283268
"view"
284269
]
285270
},
@@ -374,7 +359,7 @@
374359
},
375360
{
376361
"cell_type": "code",
377-
"execution_count": 82,
362+
"execution_count": null,
378363
"metadata": {},
379364
"outputs": [
380365
{
@@ -395,8 +380,8 @@
395380
],
396381
"source": [
397382
"# Add columns to the dataset\n",
398-
"ds_bm25.add_column(\"restaurant_name\", types.Text(index_type=types.TextIndexType.BM25))\n",
399-
"ds_bm25.add_column(\"restaurant_description\", types.Text(index_type=types.TextIndexType.BM25))\n",
383+
"ds_bm25.add_column(\"restaurant_name\", types.Text(index_type=types.BM25))\n",
384+
"ds_bm25.add_column(\"restaurant_description\", types.Text(index_type=types.BM25))\n",
400385
"ds_bm25.commit()\n",
401386
"ds_bm25.summary()"
402387
]
@@ -463,7 +448,7 @@
463448
},
464449
{
465450
"cell_type": "code",
466-
"execution_count": 84,
451+
"execution_count": null,
467452
"metadata": {},
468453
"outputs": [
469454
{
@@ -479,13 +464,11 @@
479464
],
480465
"source": [
481466
"query = \"I want burritos\"\n",
482-
"view_bm25 = ds_bm25.query(\n",
483-
" f\"\"\"\n",
467+
"view_bm25 = ds_bm25.query(f\"\"\"\n",
484468
" SELECT * \n",
485469
" ORDER BY BM25_SIMILARITY(restaurant_description, '{query}') DESC \n",
486-
" LIMIT 10\n",
487-
" \"\"\"\n",
488-
")\n",
470+
" LIMIT 10 \n",
471+
"\"\"\")\n",
489472
"view_bm25"
490473
]
491474
},
@@ -602,8 +585,8 @@
602585
"\n",
603586
"# Add columns to the dataset\n",
604587
"vector_search.add_column(name=\"embedding\", dtype=types.Embedding(3072))\n",
605-
"vector_search.add_column(name=\"restaurant_name\", dtype=types.Text(index_type=types.TextIndexType.BM25))\n",
606-
"vector_search.add_column(name=\"restaurant_description\", dtype=types.Text(index_type=types.TextIndexType.BM25))\n",
588+
"vector_search.add_column(name=\"restaurant_name\", dtype=types.Text(index_type=types.BM25))\n",
589+
"vector_search.add_column(name=\"restaurant_description\", dtype=types.Text(index_type=types.BM25))\n",
607590
"vector_search.commit()"
608591
]
609592
},
@@ -1063,7 +1046,7 @@
10631046
}
10641047
],
10651048
"source": [
1066-
"%pip install -qU numpy pydantic"
1049+
"!pip install numpy pydantic"
10671050
]
10681051
},
10691052
{
@@ -1500,7 +1483,7 @@
15001483
},
15011484
{
15021485
"cell_type": "code",
1503-
"execution_count": 1,
1486+
"execution_count": null,
15041487
"metadata": {},
15051488
"outputs": [
15061489
{
@@ -1519,8 +1502,8 @@
15191502
}
15201503
],
15211504
"source": [
1522-
"%pip install -qU torch torchvision\n",
1523-
"%pip install -q git+https://github.com/openai/CLIP.git"
1505+
"!pip install -U torch torchvision\n",
1506+
"!pip install git+https://github.com/openai/CLIP.git"
15241507
]
15251508
},
15261509
{
@@ -1823,7 +1806,7 @@
18231806
},
18241807
{
18251808
"cell_type": "code",
1826-
"execution_count": 2,
1809+
"execution_count": null,
18271810
"metadata": {},
18281811
"outputs": [
18291812
{
@@ -1838,7 +1821,7 @@
18381821
}
18391822
],
18401823
"source": [
1841-
"%pip install -q matplotlib"
1824+
"!pip install matplotlib"
18421825
]
18431826
},
18441827
{
@@ -1931,7 +1914,7 @@
19311914
},
19321915
{
19331916
"cell_type": "code",
1934-
"execution_count": 3,
1917+
"execution_count": null,
19351918
"metadata": {},
19361919
"outputs": [
19371920
{
@@ -1946,14 +1929,14 @@
19461929
}
19471930
],
19481931
"source": [
1949-
"%pip install -q colpali-engine accelerate"
1932+
"!pip install colpali-engine accelerate"
19501933
]
19511934
},
19521935
{
19531936
"cell_type": "markdown",
19541937
"metadata": {},
19551938
"source": [
1956-
"### Download the ColPali model from Hugging Face"
1939+
"### Download the ColPali model"
19571940
]
19581941
},
19591942
{
@@ -2023,7 +2006,7 @@
20232006
},
20242007
{
20252008
"cell_type": "code",
2026-
"execution_count": 4,
2009+
"execution_count": null,
20272010
"metadata": {},
20282011
"outputs": [
20292012
{
@@ -2038,7 +2021,7 @@
20382021
}
20392022
],
20402023
"source": [
2041-
"%pip install -q datasets"
2024+
"!pip install datasets"
20422025
]
20432026
},
20442027
{

0 commit comments

Comments
 (0)