Merge branch 'main' into 0.6.0

abrookins · abrookins · commit dca23267092d · 2025-04-18T10:36:49.000-07:00
diff --git a/redisvl/exceptions.py b/redisvl/exceptions.py
@@ -30,3 +30,9 @@ def __init__(self, message, index=None):
         if index is not None:
             message = f"Validation failed for object at index {index}: {message}"
         super().__init__(message)
+
+
+class QueryValidationError(RedisVLError):
+    """Error when validating a query."""
+
+    pass
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
@@ -18,6 +18,7 @@
     Union,
 )
 
+from redisvl.query.query import VectorQuery
 from redisvl.redis.utils import convert_bytes, make_dict
 from redisvl.utils.utils import deprecated_argument, deprecated_function, sync_wrapper
 
@@ -34,6 +35,7 @@
 from redis.commands.search.indexDefinition import IndexDefinition
 
 from redisvl.exceptions import (
+    QueryValidationError,
     RedisModuleVersionError,
     RedisSearchError,
     RedisVLError,
@@ -46,16 +48,18 @@
     BaseVectorQuery,
     CountQuery,
     FilterQuery,
-    HybridQuery,
 )
 from redisvl.query.filter import FilterExpression
 from redisvl.redis.connection import (
     RedisConnectionFactory,
     convert_index_info_to_schema,
 )
-from redisvl.redis.utils import convert_bytes
 from redisvl.schema import IndexSchema, StorageType
-from redisvl.schema.fields import VECTOR_NORM_MAP, VectorDistanceMetric
+from redisvl.schema.fields import (
+    VECTOR_NORM_MAP,
+    VectorDistanceMetric,
+    VectorIndexAlgorithm,
+)
 from redisvl.utils.log import get_logger
 
 logger = get_logger(__name__)
@@ -194,6 +198,15 @@ def _storage(self) -> BaseStorage:
             index_schema=self.schema
         )
 
+    def _validate_query(self, query: BaseQuery) -> None:
+        """Validate a query."""
+        if isinstance(query, VectorQuery):
+            field = self.schema.fields[query._vector_field_name]
+            if query.ef_runtime and field.attrs.algorithm != VectorIndexAlgorithm.HNSW:  # type: ignore
+                raise QueryValidationError(
+                    "Vector field using 'flat' algorithm does not support EF_RUNTIME query parameter."
+                )
+
     @property
     def name(self) -> str:
         """The name of the Redis search index."""
@@ -837,6 +850,10 @@ def batch_query(
 
     def _query(self, query: BaseQuery) -> List[Dict[str, Any]]:
         """Execute a query and process results."""
+        try:
+            self._validate_query(query)
+        except QueryValidationError as e:
+            raise QueryValidationError(f"Invalid query: {str(e)}") from e
         results = self.search(query.query, query_params=query.params)
         return process_results(results, query=query, schema=self.schema)
 
@@ -1401,7 +1418,8 @@ async def _aggregate(
     ) -> List[Dict[str, Any]]:
         """Execute an aggregation query and processes the results."""
         results = await self.aggregate(
-            aggregation_query, query_params=aggregation_query.params  # type: ignore[attr-defined]
+            aggregation_query,
+            query_params=aggregation_query.params,  # type: ignore[attr-defined]
         )
         return process_aggregate_results(
             results,
@@ -1529,6 +1547,10 @@ async def batch_query(
 
     async def _query(self, query: BaseQuery) -> List[Dict[str, Any]]:
         """Asynchronously execute a query and process results."""
+        try:
+            self._validate_query(query)
+        except QueryValidationError as e:
+            raise QueryValidationError(f"Invalid query: {str(e)}") from e
         results = await self.search(query.query, query_params=query.params)
         return process_results(results, query=query, schema=self.schema)
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -4,7 +4,9 @@
 import pytest
 from testcontainers.compose import DockerCompose
 
+from redisvl.index.index import AsyncSearchIndex, SearchIndex
 from redisvl.redis.connection import RedisConnectionFactory
+from redisvl.redis.utils import array_to_buffer
 from redisvl.utils.vectorize import HFTextVectorizer
 
 
@@ -191,3 +193,211 @@ def pytest_collection_modifyitems(
     for item in items:
         if item.get_closest_marker("requires_api_keys"):
             item.add_marker(skip_api)
+
+
+@pytest.fixture
+def flat_index(sample_data, redis_url):
+    """
+    A fixture that uses the "flag" algorithm for its vector field.
+    """
+    # construct a search index from the schema
+    index = SearchIndex.from_dict(
+        {
+            "index": {
+                "name": "user_index",
+                "prefix": "v1",
+                "storage_type": "hash",
+            },
+            "fields": [
+                {"name": "description", "type": "text"},
+                {"name": "credit_score", "type": "tag"},
+                {"name": "job", "type": "text"},
+                {"name": "age", "type": "numeric"},
+                {"name": "last_updated", "type": "numeric"},
+                {"name": "location", "type": "geo"},
+                {
+                    "name": "user_embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "algorithm": "flat",
+                        "datatype": "float32",
+                    },
+                },
+            ],
+        },
+        redis_url=redis_url,
+    )
+
+    # create the index (no data yet)
+    index.create(overwrite=True)
+
+    # Prepare and load the data
+    def hash_preprocess(item: dict) -> dict:
+        return {
+            **item,
+            "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
+        }
+
+    index.load(sample_data, preprocess=hash_preprocess)
+
+    # run the test
+    yield index
+
+    # clean up
+    index.delete(drop=True)
+
+
+@pytest.fixture
+async def async_flat_index(sample_data, redis_url):
+    """
+    A fixture that uses the "flag" algorithm for its vector field.
+    """
+    # construct a search index from the schema
+    index = AsyncSearchIndex.from_dict(
+        {
+            "index": {
+                "name": "user_index",
+                "prefix": "v1",
+                "storage_type": "hash",
+            },
+            "fields": [
+                {"name": "description", "type": "text"},
+                {"name": "credit_score", "type": "tag"},
+                {"name": "job", "type": "text"},
+                {"name": "age", "type": "numeric"},
+                {"name": "last_updated", "type": "numeric"},
+                {"name": "location", "type": "geo"},
+                {
+                    "name": "user_embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "algorithm": "flat",
+                        "datatype": "float32",
+                    },
+                },
+            ],
+        },
+        redis_url=redis_url,
+    )
+
+    # create the index (no data yet)
+    await index.create(overwrite=True)
+
+    # Prepare and load the data
+    def hash_preprocess(item: dict) -> dict:
+        return {
+            **item,
+            "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
+        }
+
+    await index.load(sample_data, preprocess=hash_preprocess)
+
+    # run the test
+    yield index
+
+    # clean up
+    await index.delete(drop=True)
+
+
+@pytest.fixture
+async def async_hnsw_index(sample_data, redis_url):
+    """
+    A fixture that uses the "hnsw" algorithm for its vector field.
+    """
+    index = AsyncSearchIndex.from_dict(
+        {
+            "index": {
+                "name": "user_index",
+                "prefix": "v1",
+                "storage_type": "hash",
+            },
+            "fields": [
+                {"name": "description", "type": "text"},
+                {"name": "credit_score", "type": "tag"},
+                {"name": "job", "type": "text"},
+                {"name": "age", "type": "numeric"},
+                {"name": "last_updated", "type": "numeric"},
+                {"name": "location", "type": "geo"},
+                {
+                    "name": "user_embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "algorithm": "hnsw",
+                        "datatype": "float32",
+                    },
+                },
+            ],
+        },
+        redis_url=redis_url,
+    )
+
+    # create the index (no data yet)
+    await index.create(overwrite=True)
+
+    # Prepare and load the data
+    def hash_preprocess(item: dict) -> dict:
+        return {
+            **item,
+            "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
+        }
+
+    await index.load(sample_data, preprocess=hash_preprocess)
+
+    # run the test
+    yield index
+
+
+@pytest.fixture
+def hnsw_index(sample_data, redis_url):
+    """
+    A fixture that uses the "hnsw" algorithm for its vector field.
+    """
+    index = SearchIndex.from_dict(
+        {
+            "index": {
+                "name": "user_index",
+                "prefix": "v1",
+                "storage_type": "hash",
+            },
+            "fields": [
+                {"name": "description", "type": "text"},
+                {"name": "credit_score", "type": "tag"},
+                {"name": "job", "type": "text"},
+                {"name": "age", "type": "numeric"},
+                {"name": "last_updated", "type": "numeric"},
+                {"name": "location", "type": "geo"},
+                {
+                    "name": "user_embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "algorithm": "hnsw",
+                        "datatype": "float32",
+                    },
+                },
+            ],
+        },
+        redis_url=redis_url,
+    )
+
+    # create the index (no data yet)
+    index.create(overwrite=True)
+
+    # Prepare and load the data
+    def hash_preprocess(item: dict) -> dict:
+        return {
+            **item,
+            "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
+        }
+
+    index.load(sample_data, preprocess=hash_preprocess)
+
+    # run the test
+    yield index
diff --git a/tests/integration/test_async_search_index.py b/tests/integration/test_async_search_index.py
@@ -5,12 +5,18 @@
 from redis import Redis as SyncRedis
 from redis.asyncio import Redis as AsyncRedis
 
-from redisvl.exceptions import RedisModuleVersionError, RedisSearchError, RedisVLError
+from redisvl.exceptions import (
+    QueryValidationError,
+    RedisModuleVersionError,
+    RedisSearchError,
+    RedisVLError,
+)
 from redisvl.index import AsyncSearchIndex
 from redisvl.query import VectorQuery
 from redisvl.query.query import FilterQuery
 from redisvl.redis.utils import convert_bytes
 from redisvl.schema import IndexSchema, StorageType
+from redisvl.schema.fields import VectorIndexAlgorithm
 
 fields = [{"name": "test", "type": "tag"}]
 
@@ -614,3 +620,41 @@ async def test_async_search_index_expire_keys(async_index):
         ttl = await client.ttl(key)
         assert ttl > 0
         assert ttl <= 30
+
+
+@pytest.mark.asyncio
+async def test_search_index_validates_query_with_flat_algorithm(
+    async_flat_index, sample_data
+):
+    assert (
+        async_flat_index.schema.fields["user_embedding"].attrs.algorithm
+        == VectorIndexAlgorithm.FLAT
+    )
+    query = VectorQuery(
+        [0.1, 0.1, 0.5],
+        "user_embedding",
+        return_fields=["user", "credit_score", "age", "job", "location"],
+        num_results=7,
+        ef_runtime=100,
+    )
+    with pytest.raises(QueryValidationError):
+        await async_flat_index.query(query)
+
+
+@pytest.mark.asyncio
+async def test_search_index_validates_query_with_hnsw_algorithm(
+    async_hnsw_index, sample_data
+):
+    assert (
+        async_hnsw_index.schema.fields["user_embedding"].attrs.algorithm
+        == VectorIndexAlgorithm.HNSW
+    )
+    query = VectorQuery(
+        [0.1, 0.1, 0.5],
+        "user_embedding",
+        return_fields=["user", "credit_score", "age", "job", "location"],
+        num_results=7,
+        ef_runtime=100,
+    )
+    # Should not raise
+    await async_hnsw_index.query(query)
diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py
diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py