@@ -55,6 +55,74 @@ class QueryResultMode(Enum):
5555 """
5656
5757
58+ class TableColumn (BaseModel ):
59+ """Represents column configuration, to be used as part of create DDL statement for a new vector store table set up."""
60+
61+ name : str
62+ """Required. The name of the column."""
63+
64+ type : str
65+ """Required. The type of the column.
66+
67+ For example,
68+
69+ - GoogleSQL: 'STRING(MAX)', 'INT64', 'FLOAT64', 'BOOL', etc.
70+ - PostgreSQL: 'text', 'int8', 'float8', 'boolean', etc.
71+ """
72+
73+ is_nullable : bool = True
74+ """Optional. Whether the column is nullable. By default, the column is nullable."""
75+
76+
77+ class VectorSearchIndexSettings (BaseModel ):
78+ """Settings for the index for use with Approximate Nearest Neighbor (ANN) vector similarity search."""
79+
80+ index_name : str
81+ """Required. The name of the vector similarity search index."""
82+
83+ additional_key_columns : Optional [list [str ]] = None
84+ """Optional. The list of the additional key column names in the vector similarity search index.
85+
86+ To further speed up filtering for highly selective filtering columns, organize
87+ them as additional keys in the vector index after the embedding column.
88+ For example: `category` as additional key column.
89+ `CREATE VECTOR INDEX ON documents(embedding, category);`
90+ """
91+
92+ additional_storing_columns : Optional [list [str ]] = None
93+ """Optional. The list of the storing column names in the vector similarity search index.
94+
95+ This enables filtering while walking the vector index, removing unqualified
96+ rows early.
97+ For example: `category` as storing column.
98+ `CREATE VECTOR INDEX ON documents(embedding) STORING (category);`
99+ """
100+
101+ tree_depth : int = 2
102+ """Required. The tree depth (level). This value can be either 2 or 3.
103+
104+ A tree with 2 levels only has leaves (num_leaves) as nodes.
105+ If the dataset has more than 100 million rows,
106+ then you can use a tree with 3 levels and add branches (num_branches) to
107+ further partition the dataset.
108+ """
109+
110+ num_leaves : int = 1000
111+ """Required. The number of leaves (i.e. potential partitions) for the vector data.
112+
113+ You can designate num_leaves for trees with 2 or 3 levels.
114+ We recommend that the number of leaves is number_of_rows_in_dataset/1000.
115+ """
116+
117+ num_branches : Optional [int ] = None
118+ """Optional. The number of branches to further parititon the vector data.
119+
120+ You can only designate num_branches for trees with 3 levels.
121+ The number of branches must be fewer than the number of leaves
122+ We recommend that the number of leaves is between 1000 and sqrt(number_of_rows_in_dataset).
123+ """
124+
125+
58126class SpannerVectorStoreSettings (BaseModel ):
59127 """Settings for Spanner Vector Store.
60128
@@ -86,27 +154,28 @@ class SpannerVectorStoreSettings(BaseModel):
86154
87155 vertex_ai_embedding_model_name : str
88156 """Required. The Vertex AI embedding model name, which is used to generate embeddings for vector store and vector similarity search.
89- For example, 'text-embedding-005'.
90157
91- Note: the output dimensionality of the embedding model should be the same as the value specified in the `vector_length` field.
92- Otherwise, a runtime error might be raised during a query.
158+ For example, 'text-embedding-005'.
159+
160+ Note: the output dimensionality of the embedding model should be the same as the value specified in the `vector_length` field.
161+ Otherwise, a runtime error might be raised during a query.
93162 """
94163
95- selected_columns : List [str ] = []
164+ selected_columns : list [str ] = []
96165 """Required. The vector store table columns to return in the vector similarity search result.
97166
98- By default, only the `content_column` value and the distance value are returned.
99- If sepecified, the list of selected columns and the distance value are returned.
100- For example, if `selected_columns` is ['col1', 'col2'], then the result will contain the values of 'col1' and 'col2' columns and the distance value.
167+ By default, only the `content_column` value and the distance value are returned.
168+ If sepecified, the list of selected columns and the distance value are returned.
169+ For example, if `selected_columns` is ['col1', 'col2'], then the result will contain the values of 'col1' and 'col2' columns and the distance value.
101170 """
102171
103172 nearest_neighbors_algorithm : NearestNeighborsAlgorithm = (
104173 "EXACT_NEAREST_NEIGHBORS"
105174 )
106175 """The algorithm used to perform vector similarity search. This value can be EXACT_NEAREST_NEIGHBORS or APPROXIMATE_NEAREST_NEIGHBORS.
107176
108- For more details about EXACT_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-k-nearest-neighbors
109- For more details about APPROXIMATE_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-approximate-nearest-neighbors
177+ For more details about EXACT_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-k-nearest-neighbors
178+ For more details about APPROXIMATE_NEAREST_NEIGHBORS, see https://docs.cloud.google.com/spanner/docs/find-approximate-nearest-neighbors
110179 """
111180
112181 top_k : int = 4
@@ -118,16 +187,41 @@ class SpannerVectorStoreSettings(BaseModel):
118187 num_leaves_to_search : Optional [int ] = None
119188 """Optional. This option specifies how many leaf nodes of the index are searched.
120189
121- Note: this option is only used when the nearest neighbors search algorithm (`nearest_neighbors_algorithm`) is APPROXIMATE_NEAREST_NEIGHBORS.
122- For more details, see https://docs.cloud.google.com/spanner/docs/vector-index-best-practices
190+ Note: This option is only used when the nearest neighbors search algorithm (`nearest_neighbors_algorithm`) is APPROXIMATE_NEAREST_NEIGHBORS.
191+ For more details, see https://docs.cloud.google.com/spanner/docs/vector-index-best-practices
123192 """
124193
125194 additional_filter : Optional [str ] = None
126195 """Optional. An optional filter to apply to the search query. If provided, this will be added to the WHERE clause of the final query."""
127196
197+ vector_search_index_settings : Optional [VectorSearchIndexSettings ] = None
198+ """Optional. Settings for the index for use with Approximate Nearest Neighbor (ANN) in the vector store.
199+
200+ Note: This option is only required when the nearest neighbors search algorithm (`nearest_neighbors_algorithm`) is APPROXIMATE_NEAREST_NEIGHBORS.
201+ For more details, see https://docs.cloud.google.com/spanner/docs/vector-indexes
202+ """
203+
204+ additional_columns_to_setup : Optional [list [TableColumn ]] = None
205+ """Optional. A list of supplemental columns to be created when initializing a new vector store table or inserting content rows.
206+
207+ Note: This configuration is only utilized during the initial table setup
208+ or when inserting content rows.
209+ """
210+
211+ primary_key_columns : Optional [list [str ]] = None
212+ """Optional. Specifies the column names to be used as the primary key for a new vector store table.
213+
214+ If provided, every column name listed here must be defined within
215+ `additional_columns_to_setup`. If this field is omitted (set to `None`),
216+ defaults to a single primary key column named `id` which automatically
217+ generates UUIDs for each entry.
218+
219+ Note: This field is only used during the creation phase of a new vector store.
220+ """
221+
128222 @model_validator (mode = "after" )
129223 def __post_init__ (self ):
130- """Validate the embedding settings."""
224+ """Validate the vector store settings."""
131225 if not self .vector_length or self .vector_length <= 0 :
132226 raise ValueError (
133227 "Invalid vector length in the Spanner vector store settings."
@@ -136,6 +230,17 @@ def __post_init__(self):
136230 if not self .selected_columns :
137231 self .selected_columns = [self .content_column ]
138232
233+ if self .primary_key_columns :
234+ cols = {self .content_column , self .embedding_column }
235+ if self .additional_columns_to_setup :
236+ cols .update ({c .name for c in self .additional_columns_to_setup })
237+
238+ for pk in self .primary_key_columns :
239+ if pk not in cols :
240+ raise ValueError (
241+ f"Primary key column '{ pk } ' not found in column definitions."
242+ )
243+
139244 return self
140245
141246
0 commit comments