Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions acceptance/bundle/refschema/out.fields.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2974,6 +2974,74 @@ resources.synced_database_tables.*.spec.source_table_full_name string ALL
resources.synced_database_tables.*.spec.timeseries_key string ALL
resources.synced_database_tables.*.unity_catalog_provisioning_state database.ProvisioningInfoState ALL
resources.synced_database_tables.*.url string INPUT
resources.vector_search_endpoints.*.budget_policy_id string INPUT STATE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we merge budget_policy_id and effective_budget_policy_id? So that we detect drift there? OR are there cases where effective_budget_policy_id is different due to some external policy?

resources.vector_search_endpoints.*.creation_timestamp int64 REMOTE
resources.vector_search_endpoints.*.creator string REMOTE
resources.vector_search_endpoints.*.custom_tags []vectorsearch.CustomTag REMOTE
resources.vector_search_endpoints.*.custom_tags[*] vectorsearch.CustomTag REMOTE
resources.vector_search_endpoints.*.custom_tags[*].key string REMOTE
resources.vector_search_endpoints.*.custom_tags[*].value string REMOTE
resources.vector_search_endpoints.*.effective_budget_policy_id string REMOTE
resources.vector_search_endpoints.*.endpoint_status *vectorsearch.EndpointStatus REMOTE
resources.vector_search_endpoints.*.endpoint_status.message string REMOTE
resources.vector_search_endpoints.*.endpoint_status.state vectorsearch.EndpointStatusState REMOTE
resources.vector_search_endpoints.*.endpoint_type vectorsearch.EndpointType ALL
resources.vector_search_endpoints.*.id string INPUT REMOTE
resources.vector_search_endpoints.*.last_updated_timestamp int64 REMOTE
resources.vector_search_endpoints.*.last_updated_user string REMOTE
resources.vector_search_endpoints.*.lifecycle resources.Lifecycle INPUT
resources.vector_search_endpoints.*.lifecycle.prevent_destroy bool INPUT
resources.vector_search_endpoints.*.min_qps int64 INPUT STATE
resources.vector_search_endpoints.*.modified_status string INPUT
resources.vector_search_endpoints.*.name string ALL
resources.vector_search_endpoints.*.num_indexes int REMOTE
resources.vector_search_endpoints.*.scaling_info *vectorsearch.EndpointScalingInfo REMOTE
resources.vector_search_endpoints.*.scaling_info.requested_min_qps int64 REMOTE
resources.vector_search_endpoints.*.scaling_info.state vectorsearch.ScalingChangeState REMOTE
resources.vector_search_endpoints.*.url string INPUT
resources.vector_search_indexes.*.creator string REMOTE
resources.vector_search_indexes.*.delta_sync_index_spec *vectorsearch.DeltaSyncVectorIndexSpecRequest INPUT STATE
resources.vector_search_indexes.*.delta_sync_index_spec *vectorsearch.DeltaSyncVectorIndexSpecResponse REMOTE
resources.vector_search_indexes.*.delta_sync_index_spec.columns_to_sync []string INPUT STATE
resources.vector_search_indexes.*.delta_sync_index_spec.columns_to_sync[*] string INPUT STATE
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_source_columns []vectorsearch.EmbeddingSourceColumn ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_source_columns[*] vectorsearch.EmbeddingSourceColumn ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_source_columns[*].embedding_model_endpoint_name string ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_source_columns[*].model_endpoint_name_for_query string ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_source_columns[*].name string ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_vector_columns []vectorsearch.EmbeddingVectorColumn ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_vector_columns[*] vectorsearch.EmbeddingVectorColumn ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_vector_columns[*].embedding_dimension int ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_vector_columns[*].name string ALL
resources.vector_search_indexes.*.delta_sync_index_spec.embedding_writeback_table string ALL
resources.vector_search_indexes.*.delta_sync_index_spec.pipeline_id string REMOTE
resources.vector_search_indexes.*.delta_sync_index_spec.pipeline_type vectorsearch.PipelineType ALL
resources.vector_search_indexes.*.delta_sync_index_spec.source_table string ALL
resources.vector_search_indexes.*.direct_access_index_spec *vectorsearch.DirectAccessVectorIndexSpec ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_source_columns []vectorsearch.EmbeddingSourceColumn ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_source_columns[*] vectorsearch.EmbeddingSourceColumn ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_source_columns[*].embedding_model_endpoint_name string ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_source_columns[*].model_endpoint_name_for_query string ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_source_columns[*].name string ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_vector_columns []vectorsearch.EmbeddingVectorColumn ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_vector_columns[*] vectorsearch.EmbeddingVectorColumn ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_vector_columns[*].embedding_dimension int ALL
resources.vector_search_indexes.*.direct_access_index_spec.embedding_vector_columns[*].name string ALL
resources.vector_search_indexes.*.direct_access_index_spec.schema_json string ALL
resources.vector_search_indexes.*.endpoint_name string ALL
resources.vector_search_indexes.*.id string INPUT
resources.vector_search_indexes.*.index_type vectorsearch.VectorIndexType ALL
resources.vector_search_indexes.*.lifecycle resources.Lifecycle INPUT
resources.vector_search_indexes.*.lifecycle.prevent_destroy bool INPUT
resources.vector_search_indexes.*.modified_status string INPUT
resources.vector_search_indexes.*.name string ALL
resources.vector_search_indexes.*.primary_key string ALL
resources.vector_search_indexes.*.status *vectorsearch.VectorIndexStatus REMOTE
resources.vector_search_indexes.*.status.index_url string REMOTE
resources.vector_search_indexes.*.status.indexed_row_count int64 REMOTE
resources.vector_search_indexes.*.status.message string REMOTE
resources.vector_search_indexes.*.status.ready bool REMOTE
resources.vector_search_indexes.*.url string INPUT
resources.volumes.*.access_point string REMOTE
resources.volumes.*.browse_only bool REMOTE
resources.volumes.*.catalog_name string ALL
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
bundle:
name: vector-search-$UNIQUE_NAME

workspace:
root_path: ~/.bundle/$UNIQUE_NAME

resources:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a follow up, please add support for permissions for these resources

vector_search_endpoints:
test_endpoint:
name: $ENDPOINT_NAME
endpoint_type: STANDARD
vector_search_indexes:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would assume customers want to use JSON data to add it to the index, so we need to likely test and implement this

inputs_json

test_index:
name: $INDEX_NAME
endpoint_name: ${resources.vector_search_endpoints.test_endpoint.name}
primary_key: my_pkey
index_type: DELTA_SYNC
delta_sync_index_spec:
source_table: my_table
pipeline_type: TRIGGERED

targets:
development:
default: true

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

=== Deploy bundle with vector search endpoint and index (index depends on endpoint)
>>> [CLI] bundle plan
create vector_search_endpoints.test_endpoint
create vector_search_indexes.test_index

Plan: 2 to add, 0 to change, 0 to delete, 0 unchanged

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]/files...
Deploying resources...
Updating deployment state...
Deployment complete!

=== Assert the endpoint is created
>>> [CLI] vector-search-endpoints get-endpoint test-vector-search-endpoint-[UNIQUE_NAME]
{
"name": "test-vector-search-endpoint-[UNIQUE_NAME]",
"endpoint_type": "STANDARD"
}

=== Assert the index is created
>>> [CLI] vector-search-indexes get-index workspace.default.test_vector_search_index_[UNIQUE_NAME]
{
"name": "workspace.default.test_vector_search_index_[UNIQUE_NAME]",
"endpoint_name": "test-vector-search-endpoint-[UNIQUE_NAME]",
"index_type": "DELTA_SYNC"
}

=== Test cleanup
>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.vector_search_endpoints.test_endpoint
delete resources.vector_search_indexes.test_index

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/[UNIQUE_NAME]

Deleting files...
Destroy complete!

=== Assert the endpoint is deleted
=== Assert the index is deleted
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

export ENDPOINT_NAME="test-vector-search-endpoint-${UNIQUE_NAME}"
export INDEX_NAME="workspace.default.test_vector_search_index_${UNIQUE_NAME}"
envsubst < databricks.yml.tmpl > databricks.yml

cleanup() {
title "Test cleanup"
trace $CLI bundle destroy --auto-approve

title "Assert the endpoint is deleted"
trace errcode $CLI vector-search-endpoints get-endpoint "${ENDPOINT_NAME}" 2>/dev/null
title "Assert the index is deleted"
trace errcode $CLI vector-search-indexes get-index "${INDEX_NAME}" 2>/dev/null
}
trap cleanup EXIT

title "Deploy bundle with vector search endpoint and index (index depends on endpoint)"
trace $CLI bundle plan
trace $CLI bundle deploy

title "Assert the endpoint is created"
trace $CLI vector-search-endpoints get-endpoint "${ENDPOINT_NAME}" | jq "{name, endpoint_type}"

title "Assert the index is created"
trace $CLI vector-search-indexes get-index "${INDEX_NAME}" | jq "{name, endpoint_name, index_type}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Tests endpoint + index with interdependence (index depends on endpoint).

Local = true
Cloud = true
RecordRequests = false

Ignore = [
".databricks",
"databricks.yml",
]

[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["direct"]
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ var unsupportedResources = []string{
"synced_database_tables",
"postgres_branches",
"postgres_endpoints",
"vector_search_endpoints",
"vector_search_indexes",
}

func TestApplyBundlePermissions(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/databricks/databricks-sdk-go/service/postgres"
"github.com/databricks/databricks-sdk-go/service/serving"
"github.com/databricks/databricks-sdk-go/service/sql"
"github.com/databricks/databricks-sdk-go/service/vectorsearch"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -246,6 +247,12 @@ func mockBundle(mode config.Mode) *bundle.Bundle {
},
},
},
VectorSearchEndpoints: map[string]*resources.VectorSearchEndpoint{
"vs_endpoint1": {CreateEndpoint: vectorsearch.CreateEndpoint{Name: "vs_endpoint1"}},
},
VectorSearchIndexes: map[string]*resources.VectorSearchIndex{
"vs_index1": {CreateVectorIndexRequest: vectorsearch.CreateVectorIndexRequest{Name: "vs_index1", EndpointName: "vs_endpoint1"}},
},
},
},
SyncRoot: vfs.MustNew("/Users/lennart.kats@databricks.com"),
Expand Down Expand Up @@ -407,12 +414,14 @@ func TestAllNonUcResourcesAreRenamed(t *testing.T) {
b := mockBundle(config.Development)

// UC resources should not have a prefix added to their name. Right now
// this list only contains the Volume, Catalog, and ExternalLocation resources since we have yet to remove
// this list only contains the Volume, Catalog, ExternalLocation, and Vector Search resources since we have yet to remove
// prefixing support for UC schemas and registered models.
ucFields := []reflect.Type{
reflect.TypeOf(&resources.Catalog{}),
reflect.TypeOf(&resources.ExternalLocation{}),
reflect.TypeOf(&resources.Volume{}),
reflect.TypeOf(&resources.VectorSearchEndpoint{}),
reflect.TypeOf(&resources.VectorSearchIndex{}),
}

diags := bundle.ApplySeq(t.Context(), b, ApplyTargetMode(), ApplyPresets())
Expand Down
4 changes: 4 additions & 0 deletions bundle/config/mutator/resourcemutator/run_as_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func allResourceTypes(t *testing.T) []string {
"secret_scopes",
"sql_warehouses",
"synced_database_tables",
"vector_search_endpoints",
"vector_search_indexes",
"volumes",
},
resourceTypes,
Expand Down Expand Up @@ -179,6 +181,8 @@ var allowList = []string{
"schemas",
"secret_scopes",
"sql_warehouses",
"vector_search_endpoints",
"vector_search_indexes",
"volumes",
}

Expand Down
24 changes: 24 additions & 0 deletions bundle/config/mutator/validate_direct_only_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,30 @@ var directOnlyResources = []directOnlyResource{
return result
},
},
{
resourceType: "vector_search_endpoints",
pluralName: "Vector Search Endpoint",
singularName: "vector search endpoint",
getResources: func(b *bundle.Bundle) map[string]any {
result := make(map[string]any)
for k, v := range b.Config.Resources.VectorSearchEndpoints {
result[k] = v
}
return result
},
},
{
resourceType: "vector_search_indexes",
pluralName: "Vector Search Index",
singularName: "vector search index",
getResources: func(b *bundle.Bundle) map[string]any {
result := make(map[string]any)
for k, v := range b.Config.Resources.VectorSearchIndexes {
result[k] = v
}
return result
},
},
}

type validateDirectOnlyResources struct {
Expand Down
6 changes: 6 additions & 0 deletions bundle/config/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ type Resources struct {
PostgresProjects map[string]*resources.PostgresProject `json:"postgres_projects,omitempty"`
PostgresBranches map[string]*resources.PostgresBranch `json:"postgres_branches,omitempty"`
PostgresEndpoints map[string]*resources.PostgresEndpoint `json:"postgres_endpoints,omitempty"`
VectorSearchEndpoints map[string]*resources.VectorSearchEndpoint `json:"vector_search_endpoints,omitempty"`
VectorSearchIndexes map[string]*resources.VectorSearchIndex `json:"vector_search_indexes,omitempty"`
}

type ConfigResource interface {
Expand Down Expand Up @@ -108,6 +110,8 @@ func (r *Resources) AllResources() []ResourceGroup {
collectResourceMap(descriptions["postgres_projects"], r.PostgresProjects),
collectResourceMap(descriptions["postgres_branches"], r.PostgresBranches),
collectResourceMap(descriptions["postgres_endpoints"], r.PostgresEndpoints),
collectResourceMap(descriptions["vector_search_endpoints"], r.VectorSearchEndpoints),
collectResourceMap(descriptions["vector_search_indexes"], r.VectorSearchIndexes),
}
}

Expand Down Expand Up @@ -162,5 +166,7 @@ func SupportedResources() map[string]resources.ResourceDescription {
"postgres_projects": (&resources.PostgresProject{}).ResourceDescription(),
"postgres_branches": (&resources.PostgresBranch{}).ResourceDescription(),
"postgres_endpoints": (&resources.PostgresEndpoint{}).ResourceDescription(),
"vector_search_endpoints": (&resources.VectorSearchEndpoint{}).ResourceDescription(),
"vector_search_indexes": (&resources.VectorSearchIndex{}).ResourceDescription(),
}
}
64 changes: 64 additions & 0 deletions bundle/config/resources/vector_search_endpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package resources

import (
"context"
"net/url"

"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/marshal"
"github.com/databricks/databricks-sdk-go/service/vectorsearch"
)

type VectorSearchEndpoint struct {
BaseResource
vectorsearch.CreateEndpoint
}

func (e *VectorSearchEndpoint) UnmarshalJSON(b []byte) error {
return marshal.Unmarshal(b, e)
}

func (e VectorSearchEndpoint) MarshalJSON() ([]byte, error) {
return marshal.Marshal(e)
}

func (e *VectorSearchEndpoint) Exists(ctx context.Context, w *databricks.WorkspaceClient, name string) (bool, error) {
_, err := w.VectorSearchEndpoints.GetEndpoint(ctx, vectorsearch.GetEndpointRequest{
EndpointName: name,
})
if err != nil {
log.Debugf(ctx, "vector search endpoint %s does not exist: %v", name, err)
if apierr.IsMissing(err) {
return false, nil
}
return false, err
}
return true, nil
}

func (*VectorSearchEndpoint) ResourceDescription() ResourceDescription {
return ResourceDescription{
SingularName: "vector_search_endpoint",
PluralName: "vector_search_endpoints",
SingularTitle: "Vector Search Endpoint",
PluralTitle: "Vector Search Endpoints",
}
}

func (e *VectorSearchEndpoint) InitializeURL(baseURL url.URL) {
if e.ID == "" {
return
}
baseURL.Path = "explore/vector-search/" + e.ID
e.URL = baseURL.String()
}

func (e *VectorSearchEndpoint) GetURL() string {
return e.URL
}

func (e *VectorSearchEndpoint) GetName() string {
return e.Name
}
Loading
Loading