Skip to content

Commit 6f3d4b5

Browse files
Merge LLM + LLMPredictor, reorganize types (run-llama#9388)
1 parent aa67267 commit 6f3d4b5

File tree

249 files changed

+2062
-1376
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

249 files changed

+2062
-1376
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
- Change more than one image input for Replicate Multi-modal models from error to warning (#9360)
1313
- Removed GPT-Licensed `aiostream` dependency (#9403)
1414

15+
### Breaking Changes
16+
17+
- Updated the base `LLM` interface to match `LLMPredictor` (#9388)
18+
- Deprecated `LLMPredictor` (#9388)
19+
1520
## [0.9.13] - 2023-12-06
1621

1722
### New Features

benchmarks/agent/agent_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from llama_index.agent import OpenAIAgent, ReActAgent
44
from llama_index.agent.types import BaseAgent
55
from llama_index.llms import Anthropic, OpenAI
6-
from llama_index.llms.base import LLM
76
from llama_index.llms.llama_utils import messages_to_prompt
7+
from llama_index.llms.llm import LLM
88
from llama_index.llms.replicate import Replicate
99

1010
OPENAI_MODELS = [

benchmarks/struct_indices/spider/evaluate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from tqdm import tqdm
1111

1212
from llama_index.indices.struct_store.sql import SQLQueryMode, SQLStructStoreIndex
13-
from llama_index.llms.base import ChatMessage, MessageRole
1413
from llama_index.llms.openai import OpenAI
14+
from llama_index.llms.types import ChatMessage, MessageRole
1515
from llama_index.response.schema import Response
1616

1717
logging.getLogger("root").setLevel(logging.WARNING)

docs/api_reference/service_context.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Service Context
66
The service context container is a utility container for LlamaIndex
77
index and query classes. The container contains the following
88
objects that are commonly used for configuring every index and
9-
query, such as the LLMPredictor (for configuring the LLM),
9+
query, such as the LLM,
1010
the PromptHelper (for configuring input size/chunk size),
1111
the BaseEmbedding (for configuring the embedding model), and more.
1212

docs/changes/deprecated_terms.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ This has been renamed to `VectorStoreIndex`, but it is only a cosmetic change. P
2424

2525
## LLMPredictor
2626

27-
The `LLMPredictor` object is no longer intended to be used by users. Instead, you can setup an LLM directly and pass it into the `ServiceContext`.
27+
The `LLMPredictor` object is no longer intended to be used by users. Instead, you can setup an LLM directly and pass it into the `ServiceContext`. THe `LLM` class itself has similar attributes and methods as the `LLMPredictor`.
2828

2929
- [LLMs in LlamaIndex](/module_guides/models/llms.md)
3030
- [Setting LLMs in the ServiceContext](/module_guides/supporting_modules/service_context.md)

docs/community/faq/llms.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ response = query_engine.query("Rest of your query... \nRespond in Italian")
4646
Alternatively:
4747

4848
```py
49-
from llama_index import LLMPredictor, ServiceContext
49+
from llama_index import ServiceContext
5050
from llama_index.llms import OpenAI
5151

52-
llm_predictor = LLMPredictor(system_prompt="Always respond in Italian.")
52+
llm = OpenAI(system_prompt="Always respond in Italian.")
5353

54-
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
54+
service_context = ServiceContext.from_defaults(llm=llm)
5555

5656
query_engine = load_index_from_storage(
5757
storage_context, service_context=service_context

docs/community/integrations/deepeval.md

-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ from llama_index import (
5858
TreeIndex,
5959
VectorStoreIndex,
6060
SimpleDirectoryReader,
61-
LLMPredictor,
6261
ServiceContext,
6362
Response,
6463
)

docs/examples/agent/openai_agent_query_plan.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@
8080
"source": [
8181
"from llama_index import (\n",
8282
" SimpleDirectoryReader,\n",
83-
" LLMPredictor,\n",
8483
" ServiceContext,\n",
8584
" GPTVectorStoreIndex,\n",
8685
")\n",

docs/examples/callbacks/HoneyHiveLlamaIndexTracer.ipynb

+8-13
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,13 @@
118118
"metadata": {},
119119
"outputs": [],
120120
"source": [
121-
"from llama_index.callbacks import CallbackManager, CBEventType\n",
122-
"from llama_index.callbacks import LlamaDebugHandler, WandbCallbackHandler\n",
121+
"from llama_index.callbacks import CallbackManager\n",
122+
"from llama_index.callbacks import LlamaDebugHandler\n",
123123
"from llama_index import (\n",
124-
" SummaryIndex,\n",
125-
" GPTTreeIndex,\n",
126-
" GPTVectorStoreIndex,\n",
124+
" VectorStoreIndex,\n",
127125
" ServiceContext,\n",
128126
" SimpleDirectoryReader,\n",
129-
" LLMPredictor,\n",
130-
" GPTSimpleKeywordTableIndex,\n",
127+
" SimpleKeywordTableIndex,\n",
131128
" StorageContext,\n",
132129
")\n",
133130
"from llama_index.indices.composability import ComposableGraph\n",
@@ -289,9 +286,7 @@
289286
}
290287
],
291288
"source": [
292-
"index = GPTVectorStoreIndex.from_documents(\n",
293-
" docs, service_context=service_context\n",
294-
")"
289+
"index = VectorStoreIndex.from_documents(docs, service_context=service_context)"
295290
]
296291
},
297292
{
@@ -421,7 +416,7 @@
421416
],
422417
"source": [
423418
"# build NYC index\n",
424-
"nyc_index = GPTVectorStoreIndex.from_documents(\n",
419+
"nyc_index = VectorStoreIndex.from_documents(\n",
425420
" nyc_documents,\n",
426421
" service_context=service_context,\n",
427422
" storage_context=storage_context,\n",
@@ -450,7 +445,7 @@
450445
],
451446
"source": [
452447
"# build essay index\n",
453-
"essay_index = GPTVectorStoreIndex.from_documents(\n",
448+
"essay_index = VectorStoreIndex.from_documents(\n",
454449
" essay_documents,\n",
455450
" service_context=service_context,\n",
456451
" storage_context=storage_context,\n",
@@ -529,7 +524,7 @@
529524
"from llama_index import StorageContext, load_graph_from_storage\n",
530525
"\n",
531526
"graph = ComposableGraph.from_indices(\n",
532-
" GPTSimpleKeywordTableIndex,\n",
527+
" SimpleKeywordTableIndex,\n",
533528
" [nyc_index, essay_index],\n",
534529
" index_summaries=[nyc_index_summary, essay_index_summary],\n",
535530
" max_keywords_per_chunk=50,\n",

docs/examples/callbacks/WandbCallbackHandler.ipynb

+7-12
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,13 @@
5757
"metadata": {},
5858
"outputs": [],
5959
"source": [
60-
"from llama_index.callbacks import CallbackManager, CBEventType\n",
60+
"from llama_index.callbacks import CallbackManager\n",
6161
"from llama_index.callbacks import LlamaDebugHandler, WandbCallbackHandler\n",
6262
"from llama_index import (\n",
63-
" SummaryIndex,\n",
64-
" GPTTreeIndex,\n",
65-
" GPTVectorStoreIndex,\n",
63+
" VectorStoreIndex,\n",
6664
" ServiceContext,\n",
6765
" SimpleDirectoryReader,\n",
68-
" LLMPredictor,\n",
69-
" GPTSimpleKeywordTableIndex,\n",
66+
" SimpleKeywordTableIndex,\n",
7067
" StorageContext,\n",
7168
")\n",
7269
"from llama_index.indices.composability import ComposableGraph\n",
@@ -238,9 +235,7 @@
238235
}
239236
],
240237
"source": [
241-
"index = GPTVectorStoreIndex.from_documents(\n",
242-
" docs, service_context=service_context\n",
243-
")"
238+
"index = VectorStoreIndex.from_documents(docs, service_context=service_context)"
244239
]
245240
},
246241
{
@@ -457,7 +452,7 @@
457452
],
458453
"source": [
459454
"# build NYC index\n",
460-
"nyc_index = GPTVectorStoreIndex.from_documents(\n",
455+
"nyc_index = VectorStoreIndex.from_documents(\n",
461456
" nyc_documents,\n",
462457
" service_context=service_context,\n",
463458
" storage_context=storage_context,\n",
@@ -493,7 +488,7 @@
493488
],
494489
"source": [
495490
"# build essay index\n",
496-
"essay_index = GPTVectorStoreIndex.from_documents(\n",
491+
"essay_index = VectorStoreIndex.from_documents(\n",
497492
" essay_documents,\n",
498493
" service_context=service_context,\n",
499494
" storage_context=storage_context,\n",
@@ -572,7 +567,7 @@
572567
"from llama_index import StorageContext, load_graph_from_storage\n",
573568
"\n",
574569
"graph = ComposableGraph.from_indices(\n",
575-
" GPTSimpleKeywordTableIndex,\n",
570+
" SimpleKeywordTableIndex,\n",
576571
" [nyc_index, essay_index],\n",
577572
" index_summaries=[nyc_index_summary, essay_index_summary],\n",
578573
" max_keywords_per_chunk=50,\n",

docs/examples/citation/pdf_page_reference.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
"metadata": {},
5858
"outputs": [],
5959
"source": [
60-
"from llama_index import LLMPredictor, ServiceContext\n",
60+
"from llama_index import ServiceContext\n",
6161
"from llama_index.llms import OpenAI\n",
6262
"\n",
6363
"service_context = ServiceContext.from_defaults(\n",

docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@
309309
")\n",
310310
"\n",
311311
"decompose_transform = DecomposeQueryTransform(\n",
312-
" service_context.llm_predictor, verbose=True\n",
312+
" service_context.llm, verbose=True\n",
313313
")"
314314
]
315315
},

docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb

+1-4
Original file line numberDiff line numberDiff line change
@@ -371,11 +371,8 @@
371371
"from llama_index.indices.query.query_transform.base import (\n",
372372
" DecomposeQueryTransform,\n",
373373
")\n",
374-
"from llama_index import LLMPredictor\n",
375374
"\n",
376-
"decompose_transform = DecomposeQueryTransform(\n",
377-
" LLMPredictor(llm=chatgpt), verbose=True\n",
378-
")"
375+
"decompose_transform = DecomposeQueryTransform(llm=chatgpt, verbose=True)"
379376
]
380377
},
381378
{

docs/examples/composable_indices/city_analysis/City_Analysis.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,6 @@
206206
" SimpleKeywordTableIndex,\n",
207207
" SummaryIndex,\n",
208208
" SimpleDirectoryReader,\n",
209-
" LLMPredictor,\n",
210209
" ServiceContext,\n",
211210
")\n",
212211
"from llama_index.llms import OpenAI\n",

docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
" VectorStoreIndex,\n",
6868
" SimpleKeywordTableIndex,\n",
6969
" SimpleDirectoryReader,\n",
70-
" LLMPredictor,\n",
7170
" ServiceContext,\n",
7271
")\n",
7372
"from llama_index.vector_stores import PineconeVectorStore\n",

docs/examples/composable_indices/financial_data_analysis/DeepLakeDemo-FinancialData.ipynb

+2-3
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@
152152
" VectorStoreIndex,\n",
153153
" SimpleKeywordTableIndex,\n",
154154
" SimpleDirectoryReader,\n",
155-
" LLMPredictor,\n",
156155
" ServiceContext,\n",
157156
" download_loader,\n",
158157
" Document,\n",
@@ -818,7 +817,7 @@
818817
")\n",
819818
"\n",
820819
"decompose_transform = DecomposeQueryTransform(\n",
821-
" service_context.llm_predictor, verbose=True\n",
820+
" service_context.llm, verbose=True\n",
822821
")"
823822
]
824823
},
@@ -879,7 +878,7 @@
879878
")\n",
880879
"\n",
881880
"decompose_transform = DecomposeQueryTransform(\n",
882-
" service_context.llm_predictor, verbose=True\n",
881+
" service_context.llm, verbose=True\n",
883882
")"
884883
]
885884
},

docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
"from llama_index import (\n",
5252
" VectorStoreIndex,\n",
5353
" SimpleDirectoryReader,\n",
54-
" LLMPredictor,\n",
5554
" ServiceContext,\n",
5655
")\n",
5756
"from llama_index.llms import OpenAI\n",

docs/examples/discover_llamaindex/document_management/discord_dumps/help_channel_dump_05_25_23.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -79388,7 +79388,7 @@
7938879388
"timestampEdited": null,
7938979389
"callEndedTimestamp": null,
7939079390
"isPinned": false,
79391-
"content": "`\n for cur_text_chunk in text_chunks:\n if not self._streaming:\n (\n response,\n formatted_prompt,\n ) = self._service_context.llm_predictor.predict(\n refine_template,\n context_msg=cur_text_chunk,\n )\n else:\n response, formatted_prompt = self._service_context.llm_predictor.stream(\n refine_template,\n context_msg=cur_text_chunk,\n )\n self._log_prompt_and_response(\n formatted_prompt, response, log_prefix=\"Refined\"\n )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
79391+
"content": "`\n for cur_text_chunk in text_chunks:\n if not self._streaming:\n (\n response,\n formatted_prompt,\n ) = self._service_context.llm.predict(\n refine_template,\n context_msg=cur_text_chunk,\n )\n else:\n response, formatted_prompt = self._service_context.llm.stream(\n refine_template,\n context_msg=cur_text_chunk,\n )\n self._log_prompt_and_response(\n formatted_prompt, response, log_prefix=\"Refined\"\n )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
7939279392
"author": {
7939379393
"id": "937548610885791806",
7939479394
"name": "noequal",
@@ -105504,7 +105504,7 @@
105504105504
"timestampEdited": null,
105505105505
"callEndedTimestamp": null,
105506105506
"isPinned": false,
105507-
"content": "`index._service_context.llm_predictor.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
105507+
"content": "`index._service_context.llm.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
105508105508
"author": {
105509105509
"id": "334536717648265216",
105510105510
"name": "Logan M",

docs/examples/discover_llamaindex/document_management/discord_dumps/help_channel_dump_06_02_23.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -79388,7 +79388,7 @@
7938879388
"timestampEdited": null,
7938979389
"callEndedTimestamp": null,
7939079390
"isPinned": false,
79391-
"content": "`\n for cur_text_chunk in text_chunks:\n if not self._streaming:\n (\n response,\n formatted_prompt,\n ) = self._service_context.llm_predictor.predict(\n refine_template,\n context_msg=cur_text_chunk,\n )\n else:\n response, formatted_prompt = self._service_context.llm_predictor.stream(\n refine_template,\n context_msg=cur_text_chunk,\n )\n self._log_prompt_and_response(\n formatted_prompt, response, log_prefix=\"Refined\"\n )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
79391+
"content": "`\n for cur_text_chunk in text_chunks:\n if not self._streaming:\n (\n response,\n formatted_prompt,\n ) = self._service_context.llm.predict(\n refine_template,\n context_msg=cur_text_chunk,\n )\n else:\n response, formatted_prompt = self._service_context.llm.stream(\n refine_template,\n context_msg=cur_text_chunk,\n )\n self._log_prompt_and_response(\n formatted_prompt, response, log_prefix=\"Refined\"\n )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
7939279392
"author": {
7939379393
"id": "937548610885791806",
7939479394
"name": "noequal",
@@ -105504,7 +105504,7 @@
105504105504
"timestampEdited": null,
105505105505
"callEndedTimestamp": null,
105506105506
"isPinned": false,
105507-
"content": "`index._service_context.llm_predictor.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
105507+
"content": "`index._service_context.llm.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
105508105508
"author": {
105509105509
"id": "334536717648265216",
105510105510
"name": "Logan M",

docs/examples/docstore/DocstoreDemo.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
"metadata": {},
6060
"outputs": [],
6161
"source": [
62-
"from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
62+
"from llama_index import SimpleDirectoryReader, ServiceContext\n",
6363
"from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",
6464
"from llama_index.composability import ComposableGraph\n",
6565
"from llama_index.llms import OpenAI"

docs/examples/docstore/FirestoreDemo.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
"from llama_index import (\n",
5757
" SimpleDirectoryReader,\n",
5858
" ServiceContext,\n",
59-
" LLMPredictor,\n",
6059
" StorageContext,\n",
6160
")\n",
6261
"from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",

docs/examples/docstore/MongoDocstoreDemo.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
"from llama_index import (\n",
6464
" SimpleDirectoryReader,\n",
6565
" ServiceContext,\n",
66-
" LLMPredictor,\n",
6766
" StorageContext,\n",
6867
")\n",
6968
"from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",

docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@
9191
"from llama_index import (\n",
9292
" SimpleDirectoryReader,\n",
9393
" ServiceContext,\n",
94-
" LLMPredictor,\n",
9594
" StorageContext,\n",
9695
")\n",
9796
"from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",

docs/examples/evaluation/Deepeval.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@
100100
" TreeIndex,\n",
101101
" VectorStoreIndex,\n",
102102
" SimpleDirectoryReader,\n",
103-
" LLMPredictor,\n",
104103
" ServiceContext,\n",
105104
" Response,\n",
106105
")\n",

docs/examples/evaluation/QuestionGeneration.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
" SimpleDirectoryReader,\n",
6565
" VectorStoreIndex,\n",
6666
" ServiceContext,\n",
67-
" LLMPredictor,\n",
6867
" Response,\n",
6968
")\n",
7069
"from llama_index.llms import OpenAI"

0 commit comments

Comments
 (0)