Skip to content

Commit 59eda98

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: add SpannerVectorStore for orchestrating and providing utility functions for a Spanner vector store
PiperOrigin-RevId: 854392465
1 parent 8fb2be2 commit 59eda98

File tree

4 files changed

+1121
-14
lines changed

4 files changed

+1121
-14
lines changed

src/google/adk/features/_feature_registry.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class FeatureName(str, Enum):
4141
PUBSUB_TOOLSET = "PUBSUB_TOOLSET"
4242
SPANNER_TOOLSET = "SPANNER_TOOLSET"
4343
SPANNER_TOOL_SETTINGS = "SPANNER_TOOL_SETTINGS"
44+
SPANNER_VECTOR_STORE = "SPANNER_VECTOR_STORE"
4445
TOOL_CONFIG = "TOOL_CONFIG"
4546
TOOL_CONFIRMATION = "TOOL_CONFIRMATION"
4647

@@ -120,6 +121,9 @@ class FeatureConfig:
120121
FeatureName.SPANNER_TOOL_SETTINGS: FeatureConfig(
121122
FeatureStage.EXPERIMENTAL, default_on=True
122123
),
124+
FeatureName.SPANNER_VECTOR_STORE: FeatureConfig(
125+
FeatureStage.EXPERIMENTAL, default_on=True
126+
),
123127
FeatureName.TOOL_CONFIG: FeatureConfig(
124128
FeatureStage.EXPERIMENTAL, default_on=True
125129
),

src/google/adk/tools/spanner/settings.py

Lines changed: 117 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,74 @@ class QueryResultMode(Enum):
5555
"""
5656

5757

58+
class TableColumn(BaseModel):
59+
"""Represents column configuration, to be used as part of create DDL statement for a new vector store table set up."""
60+
61+
name: str
62+
"""Required. The name of the column."""
63+
64+
type: str
65+
"""Required. The type of the column.
66+
67+
For example,
68+
69+
- GoogleSQL: 'STRING(MAX)', 'INT64', 'FLOAT64', 'BOOL', etc.
70+
- PostgreSQL: 'text', 'int8', 'float8', 'boolean', etc.
71+
"""
72+
73+
is_nullable: bool = True
74+
"""Optional. Whether the column is nullable. By default, the column is nullable."""
75+
76+
77+
class VectorSearchIndexSettings(BaseModel):
78+
"""Settings for the index for use with Approximate Nearest Neighbor (ANN) vector similarity search."""
79+
80+
index_name: str
81+
"""Required. The name of the vector similarity search index."""
82+
83+
additional_key_columns: Optional[list[str]] = None
84+
"""Optional. The list of the additional key column names in the vector similarity search index.
85+
86+
To further speed up filtering for highly selective filtering columns, organize
87+
them as additional keys in the vector index after the embedding column.
88+
For example: `category` as additional key column.
89+
`CREATE VECTOR INDEX ON documents(embedding, category);`
90+
"""
91+
92+
additional_storing_columns: Optional[list[str]] = None
93+
"""Optional. The list of the storing column names in the vector similarity search index.
94+
95+
This enables filtering while walking the vector index, removing unqualified
96+
rows early.
97+
For example: `category` as storing column.
98+
`CREATE VECTOR INDEX ON documents(embedding) STORING (category);`
99+
"""
100+
101+
tree_depth: int = 2
102+
"""Required. The tree depth (level). This value can be either 2 or 3.
103+
104+
A tree with 2 levels only has leaves (num_leaves) as nodes.
105+
If the dataset has more than 100 million rows,
106+
then you can use a tree with 3 levels and add branches (num_branches) to
107+
further partition the dataset.
108+
"""
109+
110+
num_leaves: int = 1000
111+
"""Required. The number of leaves (i.e. potential partitions) for the vector data.
112+
113+
You can designate num_leaves for trees with 2 or 3 levels.
114+
We recommend that the number of leaves is number_of_rows_in_dataset/1000.
115+
"""
116+
117+
num_branches: Optional[int] = None
118+
"""Optional. The number of branches to further parititon the vector data.
119+
120+
You can only designate num_branches for trees with 3 levels.
121+
The number of branches must be fewer than the number of leaves
122+
We recommend that the number of leaves is between 1000 and sqrt(number_of_rows_in_dataset).
123+
"""
124+
125+
58126
class SpannerVectorStoreSettings(BaseModel):
59127
"""Settings for Spanner Vector Store.
60128
@@ -86,27 +154,28 @@ class SpannerVectorStoreSettings(BaseModel):
86154

87155
vertex_ai_embedding_model_name: str
88156
"""Required. The Vertex AI embedding model name, which is used to generate embeddings for vector store and vector similarity search.
89-
For example, 'text-embedding-005'.
90157
91-
Note: the output dimensionality of the embedding model should be the same as the value specified in the `vector_length` field.
92-
Otherwise, a runtime error might be raised during a query.
158+
For example, 'text-embedding-005'.
159+
160+
Note: the output dimensionality of the embedding model should be the same as the value specified in the `vector_length` field.
161+
Otherwise, a runtime error might be raised during a query.
93162
"""
94163

95-
selected_columns: List[str] = []
164+
selected_columns: list[str] = []
96165
"""Required. The vector store table columns to return in the vector similarity search result.
97166
98-
By default, only the `content_column` value and the distance value are returned.
99-
If sepecified, the list of selected columns and the distance value are returned.
100-
For example, if `selected_columns` is ['col1', 'col2'], then the result will contain the values of 'col1' and 'col2' columns and the distance value.
167+
By default, only the `content_column` value and the distance value are returned.
168+
If sepecified, the list of selected columns and the distance value are returned.
169+
For example, if `selected_columns` is ['col1', 'col2'], then the result will contain the values of 'col1' and 'col2' columns and the distance value.
101170
"""
102171

103172
nearest_neighbors_algorithm: NearestNeighborsAlgorithm = (
104173
"EXACT_NEAREST_NEIGHBORS"
105174
)
106175
"""The algorithm used to perform vector similarity search. This value can be EXACT_NEAREST_NEIGHBORS or APPROXIMATE_NEAREST_NEIGHBORS.
107176
108-
For more details about EXACT_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-k-nearest-neighbors
109-
For more details about APPROXIMATE_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-approximate-nearest-neighbors
177+
For more details about EXACT_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-k-nearest-neighbors
178+
For more details about APPROXIMATE_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-approximate-nearest-neighbors
110179
"""
111180

112181
top_k: int = 4
@@ -118,16 +187,41 @@ class SpannerVectorStoreSettings(BaseModel):
118187
num_leaves_to_search: Optional[int] = None
119188
"""Optional. This option specifies how many leaf nodes of the index are searched.
120189
121-
Note: this option is only used when the nearest neighbors search algorithm (`nearest_neighbors_algorithm`) is APPROXIMATE_NEAREST_NEIGHBORS.
122-
For more details, see https://docs.cloud.google.com/spanner/docs/vector-index-best-practices
190+
Note: This option is only used when the nearest neighbors search algorithm (`nearest_neighbors_algorithm`) is APPROXIMATE_NEAREST_NEIGHBORS.
191+
For more details, see https://docs.cloud.google.com/spanner/docs/vector-index-best-practices
123192
"""
124193

125194
additional_filter: Optional[str] = None
126195
"""Optional. An optional filter to apply to the search query. If provided, this will be added to the WHERE clause of the final query."""
127196

197+
vector_search_index_settings: Optional[VectorSearchIndexSettings] = None
198+
"""Optional. Settings for the index for use with Approximate Nearest Neighbor (ANN) in the vector store.
199+
200+
Note: This option is only required when the nearest neighbors search algorithm (`nearest_neighbors_algorithm`) is APPROXIMATE_NEAREST_NEIGHBORS.
201+
For more details, see https://docs.cloud.google.com/spanner/docs/vector-indexes
202+
"""
203+
204+
additional_columns_to_setup: Optional[list[TableColumn]] = None
205+
"""Optional. A list of supplemental columns to be created when initializing a new vector store table or inserting content rows.
206+
207+
Note: This configuration is only utilized during the initial table setup
208+
or when inserting content rows.
209+
"""
210+
211+
primary_key_columns: Optional[list[str]] = None
212+
"""Optional. Specifies the column names to be used as the primary key for a new vector store table.
213+
214+
If provided, every column name listed here must be defined within
215+
`additional_columns_to_setup`. If this field is omitted (set to `None`),
216+
defaults to a single primary key column named `id` which automatically
217+
generates UUIDs for each entry.
218+
219+
Note: This field is only used during the creation phase of a new vector store.
220+
"""
221+
128222
@model_validator(mode="after")
129223
def __post_init__(self):
130-
"""Validate the embedding settings."""
224+
"""Validate the vector store settings."""
131225
if not self.vector_length or self.vector_length <= 0:
132226
raise ValueError(
133227
"Invalid vector length in the Spanner vector store settings."
@@ -136,6 +230,17 @@ def __post_init__(self):
136230
if not self.selected_columns:
137231
self.selected_columns = [self.content_column]
138232

233+
if self.primary_key_columns:
234+
cols = {self.content_column, self.embedding_column}
235+
if self.additional_columns_to_setup:
236+
cols.update({c.name for c in self.additional_columns_to_setup})
237+
238+
for pk in self.primary_key_columns:
239+
if pk not in cols:
240+
raise ValueError(
241+
f"Primary key column '{pk}' not found in column definitions."
242+
)
243+
139244
return self
140245

141246

0 commit comments

Comments
 (0)