From 053aa5d10098cd67d9d925157342bc193f907013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Fri, 25 Oct 2024 19:21:18 +0200 Subject: [PATCH] breaking: Automatically create AI Search index Default semantic configuration changed, it can cause query issues. --- README.md | 11 +- app/helpers/config_models/ai_search.py | 6 +- app/models/training.py | 1 + app/persistence/ai_search.py | 147 +++++++++++++++++++++++-- cicd/bicep/app.bicep | 30 +++++ cicd/bicep/main.bicep | 8 +- 6 files changed, 183 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 97a8feee..602da9c1 100644 --- a/README.md +++ b/README.md @@ -357,12 +357,7 @@ make deploy name=my-rg-name - Wait for the deployment to finish -#### 4. [Create a AI Search resource](https://learn.microsoft.com/en-us/azure/search/search-create-service-portal) - -- An index named `trainings` -- A semantic search configuration on the index named `default` - -#### 5. Get the logs +#### 4. Get the logs ```zsh make logs name=my-rg-name @@ -430,6 +425,10 @@ llm: streaming: true ai_search: + embedding_deployment: text-embedding-3-large-1 + embedding_dimensions: 3072 + embedding_endpoint: https://xxx.openai.azure.com + embedding_model: text-embedding-3-large endpoint: https://xxx.search.windows.net index: trainings diff --git a/app/helpers/config_models/ai_search.py b/app/helpers/config_models/ai_search.py index 2ee9a6a8..b4ba08a6 100644 --- a/app/helpers/config_models/ai_search.py +++ b/app/helpers/config_models/ai_search.py @@ -6,10 +6,14 @@ class AiSearchModel(BaseModel, frozen=True): + embedding_deployment: str + embedding_dimensions: int + embedding_endpoint: str + embedding_model: str endpoint: str expansion_n_messages: int = Field(default=10, ge=1) index: str - semantic_configuration: str = "default" + semantic_configuration: str = "semantic-default" strictness: float = Field(default=2, ge=0, le=5) top_n_documents: int = Field(default=5, ge=1) diff --git a/app/models/training.py b/app/models/training.py index 29a0a274..2352a89e 100644 --- a/app/models/training.py +++ b/app/models/training.py @@ -13,6 +13,7 @@ class TrainingModel(BaseModel, frozen=True): content: str id: UUID score: float + title: str def __hash__(self) -> int: return self.id.__hash__() diff --git a/app/persistence/ai_search.py b/app/persistence/ai_search.py index 83b6fb6a..1b5879b9 100644 --- a/app/persistence/ai_search.py +++ b/app/persistence/ai_search.py @@ -1,10 +1,29 @@ from azure.core.exceptions import ( HttpResponseError, + ResourceExistsError, ResourceNotFoundError, ServiceRequestError, ServiceResponseError, ) from azure.search.documents.aio import SearchClient +from azure.search.documents.indexes.aio import SearchIndexClient +from azure.search.documents.indexes.models import ( + AzureOpenAIParameters, + AzureOpenAIVectorizer, + HnswAlgorithmConfiguration, + LexicalAnalyzerName, + SearchableField, + SearchField, + SearchFieldDataType, + SearchIndex, + SemanticConfiguration, + SemanticField, + SemanticPrioritizedFields, + SemanticSearch, + SimpleField, + VectorSearch, + VectorSearchProfile, +) from azure.search.documents.models import ( HybridCountAndFacetMode, HybridSearch, @@ -156,14 +175,122 @@ async def training_asearch_all( return trainings or None async def _use_client(self) -> SearchClient: - if not self._client: - self._client = SearchClient( - # Deployment - endpoint=self._config.endpoint, - index_name=self._config.index, - # Performance - transport=await azure_transport(), - # Authentication - credential=await credential(), - ) + """ + Get the search client. + + If the index does not exist, it will be created. + """ + if self._client: + return self._client + + # Index configuration + fields = [ + # Required field for indexing key + SimpleField( + name="id", + key=True, + type=SearchFieldDataType.String, + ), + # Custom fields + SearchableField( + analyzer_name=LexicalAnalyzerName.STANDARD_LUCENE, + name="content", + type=SearchFieldDataType.String, + ), + SearchableField( + analyzer_name=LexicalAnalyzerName.STANDARD_LUCENE, + name="title", + type=SearchFieldDataType.String, + ), + SearchField( + name="vectors", + searchable=True, + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + vector_search_dimensions=self._config.embedding_dimensions, + vector_search_profile_name="profile-default", + ), + ] + vector_search = VectorSearch( + profiles=[ + VectorSearchProfile( + algorithm_configuration_name="algorithm-default", + name="profile-default", + vectorizer="vectorizer-default", + ), + ], + algorithms=[ + HnswAlgorithmConfiguration( + name="algorithm-default", + ), + ], + vectorizers=[ + AzureOpenAIVectorizer( + name="vectorizer-default", + # Without credentials specified, the database will use its system managed identity + azure_open_ai_parameters=AzureOpenAIParameters( + deployment_id=self._config.embedding_deployment, + model_name=self._config.embedding_model, + resource_uri=self._config.embedding_endpoint, + ), + ) + ], + ) + semantic_search = SemanticSearch( + default_configuration_name=self._config.semantic_configuration, + configurations=[ + SemanticConfiguration( + name=self._config.semantic_configuration, + prioritized_fields=SemanticPrioritizedFields( + title_field=SemanticField( + field_name="title", + ), + content_fields=[ + SemanticField( + field_name="content", + ), + ], + ), + ), + ], + ) + + # Create index if it does not exist + async with SearchIndexClient( + # Deployment + endpoint=self._config.endpoint, + index_name=self._config.index, + # Index configuration + fields=fields, + semantic_search=semantic_search, + vector_search=vector_search, + # Performance + transport=await azure_transport(), + # Authentication + credential=await credential(), + ) as client: + try: + await client.create_index( + SearchIndex( + fields=fields, + name=self._config.index, + vector_search=vector_search, + ) + ) + logger.info('Created Search "%s"', self._config.index) + except ResourceExistsError: + pass + except HttpResponseError as e: + if not e.error or not e.error.code == "ResourceNameAlreadyInUse": + raise e + + # Return client + self._client = SearchClient( + # Deployment + endpoint=self._config.endpoint, + index_name=self._config.index, + # Performance + transport=await azure_transport(), + # Authentication + credential=await credential(), + ) return self._client diff --git a/cicd/bicep/app.bicep b/cicd/bicep/app.bicep index ae3a8eca..bb34bff2 100644 --- a/cicd/bicep/app.bicep +++ b/cicd/bicep/app.bicep @@ -1,5 +1,6 @@ param cognitiveCommunicationLocation string param embeddingDeploymentType string +param embeddingDimensions int param embeddingModel string param embeddingQuota int param embeddingVersion string @@ -91,6 +92,10 @@ var config = { } } ai_search: { + embedding_deployment: embedding.name + embedding_dimensions: embeddingDimensions + embedding_endpoint: cognitiveOpenai.properties.endpoint + embedding_model: embeddingModel endpoint: 'https://${search.name}.search.windows.net' index: 'trainings' } @@ -494,6 +499,16 @@ resource assignmentsContainerAppOpenaiContributor 'Microsoft.Authorization/roleA } } +resource assignmentSearchOpenaiContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(subscription().id, prefix, cognitiveOpenai.name, 'assignmentSearchOpenaiContributor') + scope: cognitiveOpenai + properties: { + principalId: search.identity.principalId + principalType: 'ServicePrincipal' + roleDefinitionId: roleOpenaiContributor.id + } +} + resource cognitiveOpenai 'Microsoft.CognitiveServices/accounts@2024-06-01-preview' = { name: '${prefix}-${openaiLocation}-openai' location: openaiLocation @@ -784,6 +799,11 @@ resource search 'Microsoft.Search/searchServices@2024-06-01-preview' = { } } +// Search Service Contributor +resource roleSearchContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + name: '7ca78c08-252a-4471-8644-bb5ff32d4ba0' +} + // Search Index Data Reader resource roleSearchDataReader 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { name: '1407120a-92aa-4202-b7e9-c0e197c71c8f' @@ -799,6 +819,16 @@ resource assignmentSearchDataReader 'Microsoft.Authorization/roleAssignments@202 } } +resource assignmentSearchContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(subscription().id, prefix, search.name, 'assignmentSearchContributor') + scope: search + properties: { + principalId: containerApp.identity.principalId + principalType: 'ServicePrincipal' + roleDefinitionId: roleSearchContributor.id + } +} + resource translate 'Microsoft.CognitiveServices/accounts@2024-06-01-preview' = { name: '${prefix}-${location}-translate' location: location diff --git a/cicd/bicep/main.bicep b/cicd/bicep/main.bicep index a50fa518..190d352d 100644 --- a/cicd/bicep/main.bicep +++ b/cicd/bicep/main.bicep @@ -1,19 +1,20 @@ param cognitiveCommunicationLocation string param embeddingDeploymentType string = 'Standard' // Pay-as-you-go in a single region +param embeddingDimensions int = 3072 param embeddingModel string = 'text-embedding-3-large' -param embeddingQuota int = 100 +param embeddingQuota int = 50 param embeddingVersion string = '1' param imageVersion string = 'main' param instance string param llmFastContext int = 128000 param llmFastDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions param llmFastModel string = 'gpt-4o-mini' -param llmFastQuota int = 600 +param llmFastQuota int = 50 param llmFastVersion string = '2024-07-18' param llmSlowContext int = 128000 param llmSlowDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions param llmSlowModel string = 'gpt-4o' -param llmSlowQuota int = 300 +param llmSlowQuota int = 50 param llmSlowVersion string = '2024-08-06' param location string = deployment().location param openaiLocation string @@ -47,6 +48,7 @@ module app 'app.bicep' = { params: { cognitiveCommunicationLocation: cognitiveCommunicationLocation embeddingDeploymentType: embeddingDeploymentType + embeddingDimensions: embeddingDimensions embeddingModel: embeddingModel embeddingQuota: embeddingQuota embeddingVersion: embeddingVersion