Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llama-index-core/llama_index/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ class BaseNode(BaseComponent):
id_: str = Field(
default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the node."
)
embedding: Optional[List[float]] = Field(
embedding: Optional[List[Union[float, int]]] = Field(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not 100% sure how I feel about changing this. But I guess it makes sense. This will likely cause some mypy errors? Lets see the linting output

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a low-level change that I wasn't expecting to change. Any other changes to the core schema would be a more considerable breaking change if we wanted to add an embedding_type field and switch based on that.

Do you have any thoughts on how we should proceed? I suspect we'll see more need to add byte or binary support in the future.

default=None, description="Embedding of the node."
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@
)
MATCH_ALL_QUERY = {"match_all": {}} # type: Dict

VALID_DATA_TYPES = ["float", "byte", "binary"]
BYTE_VECTOR_ENGINES = ["lucene", "faiss"]
BINARY_VECTOR_ENGINE = "faiss"
INVALID_BYTE_VECTOR_ENGINE = (
"Byte vectors only support 'lucene' or 'faiss' as the engine type."
)
INVALID_DATA_TYPE = f"Data type must be one of {VALID_DATA_TYPES}"
INVALID_BINARY_ENGINE = "Binary vectors must use 'faiss' as the engine type"
INVALID_BINARY_SPACE_TYPE = "Binary vectors must use 'hamming' as the space type"


class OpensearchVectorClient:
"""
Expand All @@ -48,18 +58,12 @@ class OpensearchVectorClient:
embedding_field (str): Name of the field in the index to store
embedding array in.
text_field (str): Name of the field to grab text from
data_type (str): Type of vector data. One of ["float", "byte", "binary"]
method (Optional[dict]): Opensearch "method" JSON obj for configuring
the KNN index.
This includes engine, metric, and other config params. Defaults to:
{"name": "hnsw", "space_type": "l2", "engine": "nmslib",
"parameters": {"ef_construction": 256, "m": 48}}
settings: Optional[dict]: Settings for the Opensearch index creation. Defaults to:
{"index": {"knn": True, "knn.algo_param.ef_search": 100}}
space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
**kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.

This includes engine, metric, and other config params.
space_type (Optional[str]): space type for distance metric calculation.
**kwargs: Optional arguments passed to the OpenSearch client.
"""

def __init__(
Expand All @@ -69,6 +73,7 @@ def __init__(
dim: int,
embedding_field: str = "embedding",
text_field: str = "content",
data_type: str = "float",
method: Optional[dict] = None,
settings: Optional[dict] = None,
engine: Optional[str] = "nmslib",
Expand All @@ -80,10 +85,26 @@ def __init__(
**kwargs: Any,
):
"""Init params."""
if method is not None:
engine = method.get("engine", engine)
space_type = method.get("space_type", space_type)

if data_type not in VALID_DATA_TYPES:
raise ValueError(INVALID_DATA_TYPE)

if data_type == "byte" and engine not in BYTE_VECTOR_ENGINES:
raise ValueError(INVALID_BYTE_VECTOR_ENGINE)

if data_type == "binary":
if engine != BINARY_VECTOR_ENGINE:
raise ValueError(INVALID_BINARY_ENGINE)
if space_type != "hamming":
raise ValueError(INVALID_BINARY_SPACE_TYPE)
# Default method configuration
if method is None:
method = {
"name": "hnsw",
"space_type": "l2",
"space_type": space_type,
"engine": engine,
"parameters": {"ef_construction": 256, "m": 48},
}
Expand All @@ -99,6 +120,7 @@ def __init__(
self._index = index
self._text_field = text_field
self._max_chunk_bytes = max_chunk_bytes
self._data_type = data_type

self._search_pipeline = search_pipeline
http_auth = kwargs.get("http_auth")
Expand All @@ -112,6 +134,7 @@ def __init__(
embedding_field: {
"type": "knn_vector",
"dimension": dim,
"data_type": data_type,
"method": method,
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-vector-stores-opensearch"
readme = "README.md"
version = "0.5.2"
version = "0.6.0"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Loading
Loading