Skip to content

Commit 28c142a

Browse files
Add embed to Index configure calls (#515)
## Problem `embed` was never exposed as an argument for calling `configure` on `IndexResource`. ## Solution - Add a new simple `ConfigureIndexEmbed(TypedDict)` class for representing the argument dictionary shape. I went with this because it aligned with the existing `CreateIndexForModelEmbedTypedDict`, but I'm not sure if this is best practice in the repo at this point. Maybe a class would be better. - Update factory, sync, and async resources to pass through `embed` on `configure` calls. - Update legacy `Pinecone.configure_index` method to support `embed`. - Add integration tests to serverless resources to validate converting an existing serverless index to an integrated index using `configure` or `configure_index`. ## Type of Change - [ ] Bug fix (non-breaking change which fixes an issue) - [X] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update - [ ] Infrastructure change (CI configs, etc) - [ ] Non-code change (docs, etc) - [ ] None of the above: (explain here) ## Test Plan New integration tests added. You can pull this branch down and use poetry to run a repl and quickly evaluate things yourself: ```python poetry run repl >>> from pinecone import Pinecone >>> pc = Pinecone(api_key="YOUR_API_KEY") >>> pc.create_index(name="test-int-inf-convert", dimension=1024, metric="cosine", spec={"serverless": {"cloud": "aws", "region": "us-east-1"}}) { "name": "test-int-inf-convert", "metric": "cosine", "host": "test-int-inf-convert-bt8x3su.svc.preprod-aws-0.pinecone.io", "spec": { "serverless": { "cloud": "aws", "region": "us-east-1" } }, "status": { "ready": true, "state": "Ready" }, "vector_type": "dense", "dimension": 1024, "deletion_protection": "disabled", "tags": null } >>> pc.db.index.configure(name="test-int-inf-convert", embed={"model": "multilingual-e5-large", "field_map":{"text": "chunk_text"}}) >>> pc.db.index.describe(name="test-int-inf-convert") { "name": "test-int-inf-convert", "metric": "cosine", "host": "test-int-inf-convert-bt8x3su.svc.preprod-aws-0.pinecone.io", "spec": { "serverless": { "cloud": "aws", "region": "us-east-1" } }, "status": { "ready": true, "state": "Ready" }, "vector_type": "dense", "dimension": 1024, "deletion_protection": "disabled", "tags": null, "embed": { "model": "multilingual-e5-large", "field_map": { "text": "chunk_text" }, "dimension": 1024, "metric": "cosine", "write_parameters": { "input_type": "passage", "truncate": "END" }, "read_parameters": { "input_type": "query", "truncate": "END" }, "vector_type": "dense" } } ### repeat with async resources / pc.configure_index() ``` --- - To see the specific tasks where the Asana app for GitHub is being used, see below: - https://app.asana.com/0/0/1210417294961252
1 parent dfd0125 commit 28c142a

File tree

16 files changed

+160
-11
lines changed

16 files changed

+160
-11
lines changed

pinecone/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@
9898
"RestoreJobList": ("pinecone.db_control.models", "RestoreJobList"),
9999
"BackupModel": ("pinecone.db_control.models", "BackupModel"),
100100
"BackupList": ("pinecone.db_control.models", "BackupList"),
101+
"ConfigureIndexEmbed": ("pinecone.db_control.types", "ConfigureIndexEmbed"),
102+
"CreateIndexForModelEmbedTypedDict": (
103+
"pinecone.db_control.types",
104+
"CreateIndexForModelEmbedTypedDict",
105+
),
101106
}
102107

103108
_config_lazy_imports = {

pinecone/__init__.pyi

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ from pinecone.db_control.models import (
7878
PodSpec,
7979
PodSpecDefinition,
8080
)
81+
from pinecone.db_control.types import (
82+
ConfigureIndexEmbed,
83+
CreateIndexForModelEmbedTypedDict,
84+
)
8185
from pinecone.pinecone import Pinecone
8286
from pinecone.pinecone_asyncio import PineconeAsyncio
8387

@@ -160,4 +164,7 @@ __all__ = [
160164
"ServerlessSpecDefinition",
161165
"PodSpec",
162166
"PodSpecDefinition",
167+
# Control plane types
168+
"ConfigureIndexEmbed",
169+
"CreateIndexForModelEmbedTypedDict",
163170
]

pinecone/db_control/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .enums import *
22
from .models import *
3+
from .types import *
34
from .db_control import DBControl
45
from .db_control_asyncio import DBControlAsyncio
56
from .repr_overrides import install_repr_overrides
@@ -30,6 +31,9 @@
3031
"BackupList",
3132
"RestoreJobModel",
3233
"RestoreJobList",
34+
# from .types
35+
"ConfigureIndexEmbed",
36+
"CreateIndexForModelEmbedTypedDict",
3337
# direct imports
3438
"DBControl",
3539
"DBControlAsyncio",

pinecone/db_control/request_factory.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from pinecone.core.openapi.db_control.model.configure_index_request_spec_pod import (
2020
ConfigureIndexRequestSpecPod,
2121
)
22+
from pinecone.core.openapi.db_control.model.configure_index_request_embed import (
23+
ConfigureIndexRequestEmbed,
24+
)
2225
from pinecone.core.openapi.db_control.model.deletion_protection import (
2326
DeletionProtection as DeletionProtectionModel,
2427
)
@@ -45,7 +48,7 @@
4548
GcpRegion,
4649
AzureRegion,
4750
)
48-
from .types import CreateIndexForModelEmbedTypedDict
51+
from .types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed
4952

5053

5154
logger = logging.getLogger(__name__)
@@ -241,6 +244,7 @@ def configure_index_request(
241244
pod_type: Optional[Union[PodType, str]] = None,
242245
deletion_protection: Optional[Union[DeletionProtection, str]] = None,
243246
tags: Optional[Dict[str, str]] = None,
247+
embed: Optional[Union[ConfigureIndexEmbed, Dict]] = None,
244248
):
245249
if deletion_protection is None:
246250
dp = DeletionProtectionModel(description.deletion_protection)
@@ -271,13 +275,24 @@ def configure_index_request(
271275
if replicas:
272276
pod_config_args.update(replicas=replicas)
273277

274-
if pod_config_args != {}:
278+
embed_config = None
279+
if embed is not None:
280+
embed_config = ConfigureIndexRequestEmbed(**dict(embed))
281+
282+
spec = None
283+
if pod_config_args:
275284
spec = ConfigureIndexRequestSpec(pod=ConfigureIndexRequestSpecPod(**pod_config_args))
276-
req = ConfigureIndexRequest(deletion_protection=dp, spec=spec, tags=IndexTags(**tags))
277-
else:
278-
req = ConfigureIndexRequest(deletion_protection=dp, tags=IndexTags(**tags))
279285

280-
return req
286+
args_dict = parse_non_empty_args(
287+
[
288+
("deletion_protection", dp),
289+
("tags", IndexTags(**tags)),
290+
("spec", spec),
291+
("embed", embed_config),
292+
]
293+
)
294+
295+
return ConfigureIndexRequest(**args_dict)
281296

282297
@staticmethod
283298
def create_collection_request(name: str, source: str) -> CreateCollectionRequest:

pinecone/db_control/resources/asyncio/index.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pinecone.db_control.request_factory import PineconeDBControlRequestFactory
2828
from pinecone.core.openapi.db_control import API_VERSION
2929
from pinecone.utils import require_kwargs
30+
from pinecone.db_control.types.configure_index_embed import ConfigureIndexEmbed
3031

3132
logger = logging.getLogger(__name__)
3233
""" :meta private: """
@@ -183,6 +184,7 @@ async def configure(
183184
pod_type: Optional[Union[PodType, str]] = None,
184185
deletion_protection: Optional[Union[DeletionProtection, str]] = None,
185186
tags: Optional[Dict[str, str]] = None,
187+
embed: Optional[Union[ConfigureIndexEmbed, Dict]] = None,
186188
):
187189
description = await self.describe(name=name)
188190

@@ -192,5 +194,6 @@ async def configure(
192194
pod_type=pod_type,
193195
deletion_protection=deletion_protection,
194196
tags=tags,
197+
embed=embed,
195198
)
196199
await self._index_api.configure_index(name, configure_index_request=req)

pinecone/db_control/resources/sync/index.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
1111
from pinecone.db_control.request_factory import PineconeDBControlRequestFactory
1212
from pinecone.core.openapi.db_control import API_VERSION
13+
from pinecone.db_control.types.configure_index_embed import ConfigureIndexEmbed
1314

1415
logger = logging.getLogger(__name__)
1516
""" :meta private: """
@@ -224,6 +225,7 @@ def configure(
224225
pod_type: Optional[Union["PodType", str]] = None,
225226
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
226227
tags: Optional[Dict[str, str]] = None,
228+
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
227229
) -> None:
228230
api_instance = self._index_api
229231
description = self.describe(name=name)
@@ -234,6 +236,7 @@ def configure(
234236
pod_type=pod_type,
235237
deletion_protection=deletion_protection,
236238
tags=tags,
239+
embed=embed,
237240
)
238241
api_instance.configure_index(name, configure_index_request=req)
239242

pinecone/db_control/types/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .create_index_for_model_embed import CreateIndexForModelEmbedTypedDict
2+
from .configure_index_embed import ConfigureIndexEmbed
23

3-
__all__ = ["CreateIndexForModelEmbedTypedDict"]
4+
__all__ = ["CreateIndexForModelEmbedTypedDict", "ConfigureIndexEmbed"]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from typing import TypedDict, Dict, Any, Optional
2+
3+
4+
class ConfigureIndexEmbed(TypedDict):
5+
model: str
6+
field_map: Dict[str, str]
7+
read_parameters: Optional[Dict[str, Any]]
8+
write_parameters: Optional[Dict[str, Any]]

pinecone/legacy_pinecone_interface.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
GcpRegion,
2727
AzureRegion,
2828
)
29-
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
29+
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed
3030

3131

3232
class LegacyPineconeDBControlInterface(ABC):
@@ -438,6 +438,7 @@ def configure_index(
438438
pod_type: Optional[Union["PodType", str]] = None,
439439
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
440440
tags: Optional[Dict[str, str]] = None,
441+
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
441442
):
442443
"""
443444
:param name: the name of the Index
@@ -452,6 +453,10 @@ def configure_index(
452453
:type deletion_protection: str or DeletionProtection, optional
453454
:param tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed.
454455
:type tags: Dict[str, str], optional
456+
:param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map.
457+
The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model.
458+
You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed.
459+
:type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional
455460
456461
This method is used to modify an index's configuration. It can be used to:
457462

pinecone/pinecone.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pinecone.db_data import _Index as Index, _IndexAsyncio as IndexAsyncio
1919
from pinecone.db_control.index_host_store import IndexHostStore
2020
from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi
21-
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
21+
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed
2222
from pinecone.db_control.enums import (
2323
Metric,
2424
VectorType,
@@ -399,13 +399,15 @@ def configure_index(
399399
pod_type: Optional[Union["PodType", str]] = None,
400400
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
401401
tags: Optional[Dict[str, str]] = None,
402+
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
402403
):
403404
return self.db.index.configure(
404405
name=name,
405406
replicas=replicas,
406407
pod_type=pod_type,
407408
deletion_protection=deletion_protection,
408409
tags=tags,
410+
embed=embed,
409411
)
410412

411413
def create_collection(self, name: str, source: str) -> None:

pinecone/pinecone_asyncio.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .pinecone import check_realistic_host
1111

1212
if TYPE_CHECKING:
13-
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
13+
from pinecone.db_control.types import ConfigureIndexEmbed, CreateIndexForModelEmbedTypedDict
1414
from pinecone.db_data import _IndexAsyncio
1515
from pinecone.db_control.enums import (
1616
Metric,
@@ -273,13 +273,15 @@ async def configure_index(
273273
pod_type: Optional[Union["PodType", str]] = None,
274274
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
275275
tags: Optional[Dict[str, str]] = None,
276+
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
276277
):
277278
return await self.db.index.configure(
278279
name=name,
279280
replicas=replicas,
280281
pod_type=pod_type,
281282
deletion_protection=deletion_protection,
282283
tags=tags,
284+
embed=embed,
283285
)
284286

285287
async def create_collection(self, name: str, source: str):

pinecone/pinecone_interface_asyncio.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
GcpRegion,
3131
AzureRegion,
3232
)
33-
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict
33+
from pinecone.db_control.types import ConfigureIndexEmbed, CreateIndexForModelEmbedTypedDict
3434

3535

3636
class PineconeAsyncioDBControlInterface(ABC):
@@ -711,6 +711,7 @@ async def configure_index(
711711
pod_type: Optional[Union["PodType", str]] = None,
712712
deletion_protection: Optional[Union["DeletionProtection", str]] = None,
713713
tags: Optional[Dict[str, str]] = None,
714+
embed: Optional[Union["ConfigureIndexEmbed", Dict]] = None,
714715
):
715716
"""
716717
:param: name: the name of the Index
@@ -719,6 +720,10 @@ async def configure_index(
719720
available pod types, please see `Understanding Indexes <https://docs.pinecone.io/docs/indexes>`_
720721
:param: deletion_protection: If set to 'enabled', the index cannot be deleted. If 'disabled', the index can be deleted.
721722
:param: tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed.
723+
:param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map.
724+
The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model.
725+
You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed.
726+
:type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional
722727
723728
This method is used to modify an index's configuration. It can be used to:
724729

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,7 @@ docstring-code-line-length = "dynamic"
166166

167167
# E712 Allow == comparison to True/False
168168
"tests/**" = ["E712"]
169+
170+
[tool.black]
171+
line-length = 100
172+
target-version = ["py39"]

tests/integration/control/resources/index/test_configure.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,30 @@ def test_remove_multiple_tags(self, pc, ready_sl_index):
4141
assert found_tags is not None
4242
assert found_tags.get("foo", None) is None, "foo should be removed"
4343
assert found_tags.get("bar", None) is None, "bar should be removed"
44+
45+
def test_configure_index_embed(self, pc, create_index_params):
46+
name = create_index_params["name"]
47+
create_index_params["dimension"] = 1024
48+
pc.db.index.create(**create_index_params)
49+
desc = pc.db.index.describe(name=name)
50+
assert desc.embed is None
51+
52+
embed_config = {
53+
"model": "multilingual-e5-large",
54+
"field_map": {"text": "chunk_text"},
55+
}
56+
pc.db.index.configure(name=name, embed=embed_config)
57+
58+
desc = pc.db.index.describe(name=name)
59+
assert desc.embed.model == "multilingual-e5-large"
60+
assert desc.embed.field_map == {"text": "chunk_text"}
61+
assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"}
62+
assert desc.embed.write_parameters == {
63+
"input_type": "passage",
64+
"truncate": "END",
65+
}
66+
assert desc.embed.vector_type == "dense"
67+
assert desc.embed.dimension == 1024
68+
assert desc.embed.metric == "cosine"
69+
70+
pc.db.index.delete(name=name)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
class TestConfigureIndexEmbed:
2+
def test_convert_index_to_integrated(self, client, create_sl_index_params):
3+
name = create_sl_index_params["name"]
4+
create_sl_index_params["dimension"] = 1024
5+
client.create_index(**create_sl_index_params)
6+
desc = client.describe_index(name)
7+
assert desc.embed is None
8+
9+
embed_config = {
10+
"model": "multilingual-e5-large",
11+
"field_map": {"text": "chunk_text"},
12+
}
13+
client.configure_index(name, embed=embed_config)
14+
15+
desc = client.describe_index(name)
16+
assert desc.embed.model == "multilingual-e5-large"
17+
assert desc.embed.field_map == {"text": "chunk_text"}
18+
assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"}
19+
assert desc.embed.write_parameters == {
20+
"input_type": "passage",
21+
"truncate": "END",
22+
}
23+
assert desc.embed.vector_type == "dense"
24+
assert desc.embed.dimension == 1024
25+
assert desc.embed.metric == "cosine"
26+
27+
client.delete_index(name)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from pinecone import PineconeAsyncio
2+
3+
4+
class TestConfigureIndexEmbed:
5+
async def test_convert_index_to_integrated(self, create_sl_index_params):
6+
pc = PineconeAsyncio()
7+
name = create_sl_index_params["name"]
8+
create_sl_index_params["dimension"] = 1024
9+
await pc.create_index(**create_sl_index_params)
10+
desc = await pc.describe_index(name)
11+
assert desc.embed is None
12+
13+
embed_config = {
14+
"model": "multilingual-e5-large",
15+
"field_map": {"text": "chunk_text"},
16+
}
17+
await pc.configure_index(name, embed=embed_config)
18+
19+
desc = await pc.describe_index(name)
20+
assert desc.embed.model == "multilingual-e5-large"
21+
assert desc.embed.field_map == {"text": "chunk_text"}
22+
assert desc.embed.read_parameters == {"input_type": "query", "truncate": "END"}
23+
assert desc.embed.write_parameters == {
24+
"input_type": "passage",
25+
"truncate": "END",
26+
}
27+
assert desc.embed.vector_type == "dense"
28+
assert desc.embed.dimension == 1024
29+
assert desc.embed.metric == "cosine"
30+
31+
await pc.delete_index(name)

0 commit comments

Comments
 (0)