caretdev
diff --git a/‎CHANGELOG.md
+5 b/‎CHANGELOG.md
+5
diff --git a/‎benchmarks/agent/agent_utils.py
+1-1 b/‎benchmarks/agent/agent_utils.py
+1-1
diff --git a/‎benchmarks/struct_indices/spider/evaluate.py
+1-1 b/‎benchmarks/struct_indices/spider/evaluate.py
+1-1
diff --git a/‎docs/api_reference/service_context.rst
+1-1 b/‎docs/api_reference/service_context.rst
+1-1
diff --git a/‎docs/changes/deprecated_terms.md
+1-1 b/‎docs/changes/deprecated_terms.md
+1-1
diff --git a/‎docs/community/faq/llms.md
+3-3 b/‎docs/community/faq/llms.md
+3-3
diff --git a/‎docs/community/integrations/deepeval.md
-1 b/‎docs/community/integrations/deepeval.md
-1
diff --git a/‎docs/examples/agent/openai_agent_query_plan.ipynb
-1 b/‎docs/examples/agent/openai_agent_query_plan.ipynb
-1
diff --git a/‎docs/examples/callbacks/HoneyHiveLlamaIndexTracer.ipynb
+8-13 b/‎docs/examples/callbacks/HoneyHiveLlamaIndexTracer.ipynb
+8-13
diff --git a/‎docs/examples/callbacks/WandbCallbackHandler.ipynb
+7-12 b/‎docs/examples/callbacks/WandbCallbackHandler.ipynb
+7-12
diff --git a/‎docs/examples/citation/pdf_page_reference.ipynb
+1-1 b/‎docs/examples/citation/pdf_page_reference.ipynb
+1-1
diff --git a/‎docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb
+1-1 b/‎docs/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb
+1-1
diff --git a/‎docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb
+1-4 b/‎docs/examples/composable_indices/city_analysis/City_Analysis-Unified-Query.ipynb
+1-4
diff --git a/‎docs/examples/composable_indices/city_analysis/City_Analysis.ipynb
-1 b/‎docs/examples/composable_indices/city_analysis/City_Analysis.ipynb
-1
diff --git a/‎docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb
-1 b/‎docs/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb
-1
diff --git a/‎docs/examples/composable_indices/financial_data_analysis/DeepLakeDemo-FinancialData.ipynb
+2-3 b/‎docs/examples/composable_indices/financial_data_analysis/DeepLakeDemo-FinancialData.ipynb
+2-3
diff --git a/‎docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb
-1 b/‎docs/examples/customization/llms/SimpleIndexDemo-ChatGPT.ipynb
-1
diff --git a/‎docs/examples/discover_llamaindex/document_management/discord_dumps/help_channel_dump_05_25_23.json
+2-2 b/‎docs/examples/discover_llamaindex/document_management/discord_dumps/help_channel_dump_05_25_23.json
+2-2
diff --git a/‎docs/examples/discover_llamaindex/document_management/discord_dumps/help_channel_dump_06_02_23.json
+2-2 b/‎docs/examples/discover_llamaindex/document_management/discord_dumps/help_channel_dump_06_02_23.json
+2-2
diff --git a/‎docs/examples/docstore/DocstoreDemo.ipynb
+1-1 b/‎docs/examples/docstore/DocstoreDemo.ipynb
+1-1
diff --git a/‎docs/examples/docstore/FirestoreDemo.ipynb
-1 b/‎docs/examples/docstore/FirestoreDemo.ipynb
-1
diff --git a/‎docs/examples/docstore/MongoDocstoreDemo.ipynb
-1 b/‎docs/examples/docstore/MongoDocstoreDemo.ipynb
-1
diff --git a/‎docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
-1 b/‎docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb
-1
diff --git a/‎docs/examples/evaluation/Deepeval.ipynb
-1 b/‎docs/examples/evaluation/Deepeval.ipynb
-1
diff --git a/‎docs/examples/evaluation/QuestionGeneration.ipynb
-1 b/‎docs/examples/evaluation/QuestionGeneration.ipynb
-1
@@ -12,6 +12,11 @@
 - Change more than one image input for Replicate Multi-modal models from error to warning (#9360)
 - Removed GPT-Licensed `aiostream` dependency (#9403)
 
+### Breaking Changes
+
+- Updated the base `LLM` interface to match `LLMPredictor` (#9388)
+- Deprecated `LLMPredictor` (#9388)
+
 ## [0.9.13] - 2023-12-06
 
 ### New Features
 
@@ -3,8 +3,8 @@
 from llama_index.agent import OpenAIAgent, ReActAgent
 from llama_index.agent.types import BaseAgent
 from llama_index.llms import Anthropic, OpenAI
-from llama_index.llms.base import LLM
 from llama_index.llms.llama_utils import messages_to_prompt
+from llama_index.llms.llm import LLM
 from llama_index.llms.replicate import Replicate
 
 OPENAI_MODELS = [
 
@@ -10,8 +10,8 @@
 from tqdm import tqdm
 
 from llama_index.indices.struct_store.sql import SQLQueryMode, SQLStructStoreIndex
-from llama_index.llms.base import ChatMessage, MessageRole
 from llama_index.llms.openai import OpenAI
+from llama_index.llms.types import ChatMessage, MessageRole
 from llama_index.response.schema import Response
 
 logging.getLogger("root").setLevel(logging.WARNING)
 
@@ -6,7 +6,7 @@ Service Context
 The service context container is a utility container for LlamaIndex
 index and query classes. The container contains the following
 objects that are commonly used for configuring every index and
-query, such as the LLMPredictor (for configuring the LLM),
+query, such as the LLM,
 the PromptHelper (for configuring input size/chunk size),
 the BaseEmbedding (for configuring the embedding model), and more.
 
 
@@ -24,7 +24,7 @@ This has been renamed to `VectorStoreIndex`, but it is only a cosmetic change. P
 
 ## LLMPredictor
 
-The `LLMPredictor` object is no longer intended to be used by users. Instead, you can setup an LLM directly and pass it into the `ServiceContext`.
+The `LLMPredictor` object is no longer intended to be used by users. Instead, you can setup an LLM directly and pass it into the `ServiceContext`. THe `LLM` class itself has similar attributes and methods as the `LLMPredictor`.
 
 - [LLMs in LlamaIndex](/module_guides/models/llms.md)
 - [Setting LLMs in the ServiceContext](/module_guides/supporting_modules/service_context.md)
 
@@ -46,12 +46,12 @@ response = query_engine.query("Rest of your query... \nRespond in Italian")
 Alternatively:
 
 ```py
-from llama_index import LLMPredictor, ServiceContext
+from llama_index import ServiceContext
 from llama_index.llms import OpenAI
 
-llm_predictor = LLMPredictor(system_prompt="Always respond in Italian.")
+llm = OpenAI(system_prompt="Always respond in Italian.")
 
-service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
+service_context = ServiceContext.from_defaults(llm=llm)
 
 query_engine = load_index_from_storage(
     storage_context, service_context=service_context
 
@@ -58,7 +58,6 @@ from llama_index import (
     TreeIndex,
     VectorStoreIndex,
     SimpleDirectoryReader,
-    LLMPredictor,
     ServiceContext,
     Response,
 )
 
@@ -80,7 +80,6 @@
    "source": [
     "from llama_index import (\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     "    GPTVectorStoreIndex,\n",
     ")\n",
 
@@ -118,16 +118,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.callbacks import CallbackManager, CBEventType\n",
-    "from llama_index.callbacks import LlamaDebugHandler, WandbCallbackHandler\n",
+    "from llama_index.callbacks import CallbackManager\n",
+    "from llama_index.callbacks import LlamaDebugHandler\n",
     "from llama_index import (\n",
-    "    SummaryIndex,\n",
-    "    GPTTreeIndex,\n",
-    "    GPTVectorStoreIndex,\n",
+    "    VectorStoreIndex,\n",
     "    ServiceContext,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
-    "    GPTSimpleKeywordTableIndex,\n",
+    "    SimpleKeywordTableIndex,\n",
     "    StorageContext,\n",
     ")\n",
     "from llama_index.indices.composability import ComposableGraph\n",
@@ -289,9 +286,7 @@
     }
    ],
    "source": [
-    "index = GPTVectorStoreIndex.from_documents(\n",
-    "    docs, service_context=service_context\n",
-    ")"
+    "index = VectorStoreIndex.from_documents(docs, service_context=service_context)"
    ]
   },
   {
@@ -421,7 +416,7 @@
    ],
    "source": [
     "# build NYC index\n",
-    "nyc_index = GPTVectorStoreIndex.from_documents(\n",
+    "nyc_index = VectorStoreIndex.from_documents(\n",
     "    nyc_documents,\n",
     "    service_context=service_context,\n",
     "    storage_context=storage_context,\n",
@@ -450,7 +445,7 @@
    ],
    "source": [
     "# build essay index\n",
-    "essay_index = GPTVectorStoreIndex.from_documents(\n",
+    "essay_index = VectorStoreIndex.from_documents(\n",
     "    essay_documents,\n",
     "    service_context=service_context,\n",
     "    storage_context=storage_context,\n",
@@ -529,7 +524,7 @@
     "from llama_index import StorageContext, load_graph_from_storage\n",
     "\n",
     "graph = ComposableGraph.from_indices(\n",
-    "    GPTSimpleKeywordTableIndex,\n",
+    "    SimpleKeywordTableIndex,\n",
     "    [nyc_index, essay_index],\n",
     "    index_summaries=[nyc_index_summary, essay_index_summary],\n",
     "    max_keywords_per_chunk=50,\n",
 
@@ -57,16 +57,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.callbacks import CallbackManager, CBEventType\n",
+    "from llama_index.callbacks import CallbackManager\n",
     "from llama_index.callbacks import LlamaDebugHandler, WandbCallbackHandler\n",
     "from llama_index import (\n",
-    "    SummaryIndex,\n",
-    "    GPTTreeIndex,\n",
-    "    GPTVectorStoreIndex,\n",
+    "    VectorStoreIndex,\n",
     "    ServiceContext,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
-    "    GPTSimpleKeywordTableIndex,\n",
+    "    SimpleKeywordTableIndex,\n",
     "    StorageContext,\n",
     ")\n",
     "from llama_index.indices.composability import ComposableGraph\n",
@@ -238,9 +235,7 @@
     }
    ],
    "source": [
-    "index = GPTVectorStoreIndex.from_documents(\n",
-    "    docs, service_context=service_context\n",
-    ")"
+    "index = VectorStoreIndex.from_documents(docs, service_context=service_context)"
    ]
   },
   {
@@ -457,7 +452,7 @@
    ],
    "source": [
     "# build NYC index\n",
-    "nyc_index = GPTVectorStoreIndex.from_documents(\n",
+    "nyc_index = VectorStoreIndex.from_documents(\n",
     "    nyc_documents,\n",
     "    service_context=service_context,\n",
     "    storage_context=storage_context,\n",
@@ -493,7 +488,7 @@
    ],
    "source": [
     "# build essay index\n",
-    "essay_index = GPTVectorStoreIndex.from_documents(\n",
+    "essay_index = VectorStoreIndex.from_documents(\n",
     "    essay_documents,\n",
     "    service_context=service_context,\n",
     "    storage_context=storage_context,\n",
@@ -572,7 +567,7 @@
     "from llama_index import StorageContext, load_graph_from_storage\n",
     "\n",
     "graph = ComposableGraph.from_indices(\n",
-    "    GPTSimpleKeywordTableIndex,\n",
+    "    SimpleKeywordTableIndex,\n",
     "    [nyc_index, essay_index],\n",
     "    index_summaries=[nyc_index_summary, essay_index_summary],\n",
     "    max_keywords_per_chunk=50,\n",
 
@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index import LLMPredictor, ServiceContext\n",
+    "from llama_index import ServiceContext\n",
     "from llama_index.llms import OpenAI\n",
     "\n",
     "service_context = ServiceContext.from_defaults(\n",
 
@@ -309,7 +309,7 @@
     ")\n",
     "\n",
     "decompose_transform = DecomposeQueryTransform(\n",
-    "    service_context.llm_predictor, verbose=True\n",
+    "    service_context.llm, verbose=True\n",
     ")"
    ]
   },
 
@@ -371,11 +371,8 @@
     "from llama_index.indices.query.query_transform.base import (\n",
     "    DecomposeQueryTransform,\n",
     ")\n",
-    "from llama_index import LLMPredictor\n",
     "\n",
-    "decompose_transform = DecomposeQueryTransform(\n",
-    "    LLMPredictor(llm=chatgpt), verbose=True\n",
-    ")"
+    "decompose_transform = DecomposeQueryTransform(llm=chatgpt, verbose=True)"
    ]
   },
   {
 
@@ -206,7 +206,6 @@
     "    SimpleKeywordTableIndex,\n",
     "    SummaryIndex,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     ")\n",
     "from llama_index.llms import OpenAI\n",
 
@@ -67,7 +67,6 @@
     "    VectorStoreIndex,\n",
     "    SimpleKeywordTableIndex,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     ")\n",
     "from llama_index.vector_stores import PineconeVectorStore\n",
 
@@ -152,7 +152,6 @@
     "    VectorStoreIndex,\n",
     "    SimpleKeywordTableIndex,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     "    download_loader,\n",
     "    Document,\n",
@@ -818,7 +817,7 @@
     ")\n",
     "\n",
     "decompose_transform = DecomposeQueryTransform(\n",
-    "    service_context.llm_predictor, verbose=True\n",
+    "    service_context.llm, verbose=True\n",
     ")"
    ]
   },
@@ -879,7 +878,7 @@
     ")\n",
     "\n",
     "decompose_transform = DecomposeQueryTransform(\n",
-    "    service_context.llm_predictor, verbose=True\n",
+    "    service_context.llm, verbose=True\n",
     ")"
    ]
   },
 
@@ -51,7 +51,6 @@
     "from llama_index import (\n",
     "    VectorStoreIndex,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     ")\n",
     "from llama_index.llms import OpenAI\n",
 
@@ -79388,7 +79388,7 @@
       "timestampEdited": null,
       "callEndedTimestamp": null,
       "isPinned": false,
-      "content": "`\n        for cur_text_chunk in text_chunks:\n            if not self._streaming:\n                (\n                    response,\n                    formatted_prompt,\n                ) = self._service_context.llm_predictor.predict(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            else:\n                response, formatted_prompt = self._service_context.llm_predictor.stream(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            self._log_prompt_and_response(\n                formatted_prompt, response, log_prefix=\"Refined\"\n            )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
+      "content": "`\n        for cur_text_chunk in text_chunks:\n            if not self._streaming:\n                (\n                    response,\n                    formatted_prompt,\n                ) = self._service_context.llm.predict(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            else:\n                response, formatted_prompt = self._service_context.llm.stream(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            self._log_prompt_and_response(\n                formatted_prompt, response, log_prefix=\"Refined\"\n            )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
       "author": {
         "id": "937548610885791806",
         "name": "noequal",
@@ -105504,7 +105504,7 @@
       "timestampEdited": null,
       "callEndedTimestamp": null,
       "isPinned": false,
-      "content": "`index._service_context.llm_predictor.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
+      "content": "`index._service_context.llm.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
       "author": {
         "id": "334536717648265216",
         "name": "Logan M",
 
@@ -79388,7 +79388,7 @@
       "timestampEdited": null,
       "callEndedTimestamp": null,
       "isPinned": false,
-      "content": "`\n        for cur_text_chunk in text_chunks:\n            if not self._streaming:\n                (\n                    response,\n                    formatted_prompt,\n                ) = self._service_context.llm_predictor.predict(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            else:\n                response, formatted_prompt = self._service_context.llm_predictor.stream(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            self._log_prompt_and_response(\n                formatted_prompt, response, log_prefix=\"Refined\"\n            )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
+      "content": "`\n        for cur_text_chunk in text_chunks:\n            if not self._streaming:\n                (\n                    response,\n                    formatted_prompt,\n                ) = self._service_context.llm.predict(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            else:\n                response, formatted_prompt = self._service_context.llm.stream(\n                    refine_template,\n                    context_msg=cur_text_chunk,\n                )\n            self._log_prompt_and_response(\n                formatted_prompt, response, log_prefix=\"Refined\"\n            )\n`\nA code snippet in refine_response_single seems to have no effect on the subsequent llm query after being split into cur_text_chunk. It appears that the response is entirely based on the result of the last text chunk, and previous chunks are essentially discarded. I feel that there may be some issues with this logic, perhaps it's just my understanding problem. I hope you can clarify this for me, thank you.",
       "author": {
         "id": "937548610885791806",
         "name": "noequal",
@@ -105504,7 +105504,7 @@
       "timestampEdited": null,
       "callEndedTimestamp": null,
       "isPinned": false,
-      "content": "`index._service_context.llm_predictor.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
+      "content": "`index._service_context.llm.last_token_usage()`\n\n`index._service_context.embed_model.last_token_usage()`",
       "author": {
         "id": "334536717648265216",
         "name": "Logan M",
 
@@ -59,7 +59,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor\n",
+    "from llama_index import SimpleDirectoryReader, ServiceContext\n",
     "from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",
     "from llama_index.composability import ComposableGraph\n",
     "from llama_index.llms import OpenAI"
 
@@ -56,7 +56,6 @@
     "from llama_index import (\n",
     "    SimpleDirectoryReader,\n",
     "    ServiceContext,\n",
-    "    LLMPredictor,\n",
     "    StorageContext,\n",
     ")\n",
     "from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",
 
@@ -63,7 +63,6 @@
     "from llama_index import (\n",
     "    SimpleDirectoryReader,\n",
     "    ServiceContext,\n",
-    "    LLMPredictor,\n",
     "    StorageContext,\n",
     ")\n",
     "from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",
 
@@ -91,7 +91,6 @@
     "from llama_index import (\n",
     "    SimpleDirectoryReader,\n",
     "    ServiceContext,\n",
-    "    LLMPredictor,\n",
     "    StorageContext,\n",
     ")\n",
     "from llama_index import VectorStoreIndex, SummaryIndex, SimpleKeywordTableIndex\n",
 
@@ -100,7 +100,6 @@
     "    TreeIndex,\n",
     "    VectorStoreIndex,\n",
     "    SimpleDirectoryReader,\n",
-    "    LLMPredictor,\n",
     "    ServiceContext,\n",
     "    Response,\n",
     ")\n",
 
@@ -64,7 +64,6 @@
     "    SimpleDirectoryReader,\n",
     "    VectorStoreIndex,\n",
     "    ServiceContext,\n",
-    "    LLMPredictor,\n",
     "    Response,\n",
     ")\n",
     "from llama_index.llms import OpenAI"
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,6 @@ from llama_index import (`
`58`	`58`	`TreeIndex,`
`59`	`59`	`VectorStoreIndex,`
`60`	`60`	`SimpleDirectoryReader,`
`61`		`- LLMPredictor,`
`62`	`61`	`ServiceContext,`
`63`	`62`	`Response,`
`64`	`63`	`)`
Original file line number	Diff line number	Diff line change
`@@ -309,7 +309,7 @@`
`309`	`309`	`")\n",`
`310`	`310`	`"\n",`
`311`	`311`	`"decompose_transform = DecomposeQueryTransform(\n",`
`312`		`- " service_context.llm_predictor, verbose=True\n",`
	`312`	`+ " service_context.llm, verbose=True\n",`
`313`	`313`	`")"`
`314`	`314`	`]`
`315`	`315`	`},`
Original file line number	Diff line number	Diff line change
`@@ -371,11 +371,8 @@`
`371`	`371`	`"from llama_index.indices.query.query_transform.base import (\n",`
`372`	`372`	`" DecomposeQueryTransform,\n",`
`373`	`373`	`")\n",`
`374`		`- "from llama_index import LLMPredictor\n",`
`375`	`374`	`"\n",`
`376`		`- "decompose_transform = DecomposeQueryTransform(\n",`
`377`		`- " LLMPredictor(llm=chatgpt), verbose=True\n",`
`378`		`- ")"`
	`375`	`+ "decompose_transform = DecomposeQueryTransform(llm=chatgpt, verbose=True)"`
`379`	`376`	`]`
`380`	`377`	`},`
`381`	`378`	`{`