From 053aa5d10098cd67d9d925157342bc193f907013 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= <clemence@lesne.pro>
Date: Fri, 25 Oct 2024 19:21:18 +0200
Subject: [PATCH] breaking: Automatically create AI Search index

Default semantic configuration changed, it can cause query issues.
---
 README.md                              |  11 +-
 app/helpers/config_models/ai_search.py |   6 +-
 app/models/training.py                 |   1 +
 app/persistence/ai_search.py           | 147 +++++++++++++++++++++++--
 cicd/bicep/app.bicep                   |  30 +++++
 cicd/bicep/main.bicep                  |   8 +-
 6 files changed, 183 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 97a8feee..602da9c1 100644
--- a/README.md
+++ b/README.md
@@ -357,12 +357,7 @@ make deploy name=my-rg-name
 
 - Wait for the deployment to finish
 
-#### 4. [Create a AI Search resource](https://learn.microsoft.com/en-us/azure/search/search-create-service-portal)
-
-- An index named `trainings`
-- A semantic search configuration on the index named `default`
-
-#### 5. Get the logs
+#### 4. Get the logs
 
 ```zsh
 make logs name=my-rg-name
@@ -430,6 +425,10 @@ llm:
       streaming: true
 
 ai_search:
+  embedding_deployment: text-embedding-3-large-1
+  embedding_dimensions: 3072
+  embedding_endpoint: https://xxx.openai.azure.com
+  embedding_model: text-embedding-3-large
   endpoint: https://xxx.search.windows.net
   index: trainings
 
diff --git a/app/helpers/config_models/ai_search.py b/app/helpers/config_models/ai_search.py
index 2ee9a6a8..b4ba08a6 100644
--- a/app/helpers/config_models/ai_search.py
+++ b/app/helpers/config_models/ai_search.py
@@ -6,10 +6,14 @@
 
 
 class AiSearchModel(BaseModel, frozen=True):
+    embedding_deployment: str
+    embedding_dimensions: int
+    embedding_endpoint: str
+    embedding_model: str
     endpoint: str
     expansion_n_messages: int = Field(default=10, ge=1)
     index: str
-    semantic_configuration: str = "default"
+    semantic_configuration: str = "semantic-default"
     strictness: float = Field(default=2, ge=0, le=5)
     top_n_documents: int = Field(default=5, ge=1)
 
diff --git a/app/models/training.py b/app/models/training.py
index 29a0a274..2352a89e 100644
--- a/app/models/training.py
+++ b/app/models/training.py
@@ -13,6 +13,7 @@ class TrainingModel(BaseModel, frozen=True):
     content: str
     id: UUID
     score: float
+    title: str
 
     def __hash__(self) -> int:
         return self.id.__hash__()
diff --git a/app/persistence/ai_search.py b/app/persistence/ai_search.py
index 83b6fb6a..1b5879b9 100644
--- a/app/persistence/ai_search.py
+++ b/app/persistence/ai_search.py
@@ -1,10 +1,29 @@
 from azure.core.exceptions import (
     HttpResponseError,
+    ResourceExistsError,
     ResourceNotFoundError,
     ServiceRequestError,
     ServiceResponseError,
 )
 from azure.search.documents.aio import SearchClient
+from azure.search.documents.indexes.aio import SearchIndexClient
+from azure.search.documents.indexes.models import (
+    AzureOpenAIParameters,
+    AzureOpenAIVectorizer,
+    HnswAlgorithmConfiguration,
+    LexicalAnalyzerName,
+    SearchableField,
+    SearchField,
+    SearchFieldDataType,
+    SearchIndex,
+    SemanticConfiguration,
+    SemanticField,
+    SemanticPrioritizedFields,
+    SemanticSearch,
+    SimpleField,
+    VectorSearch,
+    VectorSearchProfile,
+)
 from azure.search.documents.models import (
     HybridCountAndFacetMode,
     HybridSearch,
@@ -156,14 +175,122 @@ async def training_asearch_all(
         return trainings or None
 
     async def _use_client(self) -> SearchClient:
-        if not self._client:
-            self._client = SearchClient(
-                # Deployment
-                endpoint=self._config.endpoint,
-                index_name=self._config.index,
-                # Performance
-                transport=await azure_transport(),
-                # Authentication
-                credential=await credential(),
-            )
+        """
+        Get the search client.
+
+        If the index does not exist, it will be created.
+        """
+        if self._client:
+            return self._client
+
+        # Index configuration
+        fields = [
+            # Required field for indexing key
+            SimpleField(
+                name="id",
+                key=True,
+                type=SearchFieldDataType.String,
+            ),
+            # Custom fields
+            SearchableField(
+                analyzer_name=LexicalAnalyzerName.STANDARD_LUCENE,
+                name="content",
+                type=SearchFieldDataType.String,
+            ),
+            SearchableField(
+                analyzer_name=LexicalAnalyzerName.STANDARD_LUCENE,
+                name="title",
+                type=SearchFieldDataType.String,
+            ),
+            SearchField(
+                name="vectors",
+                searchable=True,
+                type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+                vector_search_dimensions=self._config.embedding_dimensions,
+                vector_search_profile_name="profile-default",
+            ),
+        ]
+        vector_search = VectorSearch(
+            profiles=[
+                VectorSearchProfile(
+                    algorithm_configuration_name="algorithm-default",
+                    name="profile-default",
+                    vectorizer="vectorizer-default",
+                ),
+            ],
+            algorithms=[
+                HnswAlgorithmConfiguration(
+                    name="algorithm-default",
+                ),
+            ],
+            vectorizers=[
+                AzureOpenAIVectorizer(
+                    name="vectorizer-default",
+                    # Without credentials specified, the database will use its system managed identity
+                    azure_open_ai_parameters=AzureOpenAIParameters(
+                        deployment_id=self._config.embedding_deployment,
+                        model_name=self._config.embedding_model,
+                        resource_uri=self._config.embedding_endpoint,
+                    ),
+                )
+            ],
+        )
+        semantic_search = SemanticSearch(
+            default_configuration_name=self._config.semantic_configuration,
+            configurations=[
+                SemanticConfiguration(
+                    name=self._config.semantic_configuration,
+                    prioritized_fields=SemanticPrioritizedFields(
+                        title_field=SemanticField(
+                            field_name="title",
+                        ),
+                        content_fields=[
+                            SemanticField(
+                                field_name="content",
+                            ),
+                        ],
+                    ),
+                ),
+            ],
+        )
+
+        # Create index if it does not exist
+        async with SearchIndexClient(
+            # Deployment
+            endpoint=self._config.endpoint,
+            index_name=self._config.index,
+            # Index configuration
+            fields=fields,
+            semantic_search=semantic_search,
+            vector_search=vector_search,
+            # Performance
+            transport=await azure_transport(),
+            # Authentication
+            credential=await credential(),
+        ) as client:
+            try:
+                await client.create_index(
+                    SearchIndex(
+                        fields=fields,
+                        name=self._config.index,
+                        vector_search=vector_search,
+                    )
+                )
+                logger.info('Created Search "%s"', self._config.index)
+            except ResourceExistsError:
+                pass
+            except HttpResponseError as e:
+                if not e.error or not e.error.code == "ResourceNameAlreadyInUse":
+                    raise e
+
+        # Return client
+        self._client = SearchClient(
+            # Deployment
+            endpoint=self._config.endpoint,
+            index_name=self._config.index,
+            # Performance
+            transport=await azure_transport(),
+            # Authentication
+            credential=await credential(),
+        )
         return self._client
diff --git a/cicd/bicep/app.bicep b/cicd/bicep/app.bicep
index ae3a8eca..bb34bff2 100644
--- a/cicd/bicep/app.bicep
+++ b/cicd/bicep/app.bicep
@@ -1,5 +1,6 @@
 param cognitiveCommunicationLocation string
 param embeddingDeploymentType string
+param embeddingDimensions int
 param embeddingModel string
 param embeddingQuota int
 param embeddingVersion string
@@ -91,6 +92,10 @@ var config = {
     }
   }
   ai_search: {
+    embedding_deployment: embedding.name
+    embedding_dimensions: embeddingDimensions
+    embedding_endpoint: cognitiveOpenai.properties.endpoint
+    embedding_model: embeddingModel
     endpoint: 'https://${search.name}.search.windows.net'
     index: 'trainings'
   }
@@ -494,6 +499,16 @@ resource assignmentsContainerAppOpenaiContributor 'Microsoft.Authorization/roleA
   }
 }
 
+resource assignmentSearchOpenaiContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(subscription().id, prefix, cognitiveOpenai.name, 'assignmentSearchOpenaiContributor')
+  scope: cognitiveOpenai
+  properties: {
+    principalId: search.identity.principalId
+    principalType: 'ServicePrincipal'
+    roleDefinitionId: roleOpenaiContributor.id
+  }
+}
+
 resource cognitiveOpenai 'Microsoft.CognitiveServices/accounts@2024-06-01-preview' = {
   name: '${prefix}-${openaiLocation}-openai'
   location: openaiLocation
@@ -784,6 +799,11 @@ resource search 'Microsoft.Search/searchServices@2024-06-01-preview' = {
   }
 }
 
+// Search Service Contributor
+resource roleSearchContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '7ca78c08-252a-4471-8644-bb5ff32d4ba0'
+}
+
 // Search Index Data Reader
 resource roleSearchDataReader 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
   name: '1407120a-92aa-4202-b7e9-c0e197c71c8f'
@@ -799,6 +819,16 @@ resource assignmentSearchDataReader 'Microsoft.Authorization/roleAssignments@202
   }
 }
 
+resource assignmentSearchContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(subscription().id, prefix, search.name, 'assignmentSearchContributor')
+  scope: search
+  properties: {
+    principalId: containerApp.identity.principalId
+    principalType: 'ServicePrincipal'
+    roleDefinitionId: roleSearchContributor.id
+  }
+}
+
 resource translate 'Microsoft.CognitiveServices/accounts@2024-06-01-preview' = {
   name: '${prefix}-${location}-translate'
   location: location
diff --git a/cicd/bicep/main.bicep b/cicd/bicep/main.bicep
index a50fa518..190d352d 100644
--- a/cicd/bicep/main.bicep
+++ b/cicd/bicep/main.bicep
@@ -1,19 +1,20 @@
 param cognitiveCommunicationLocation string
 param embeddingDeploymentType string = 'Standard' // Pay-as-you-go in a single region
+param embeddingDimensions int = 3072
 param embeddingModel string = 'text-embedding-3-large'
-param embeddingQuota int = 100
+param embeddingQuota int = 50
 param embeddingVersion string = '1'
 param imageVersion string = 'main'
 param instance string
 param llmFastContext int = 128000
 param llmFastDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions
 param llmFastModel string = 'gpt-4o-mini'
-param llmFastQuota int = 600
+param llmFastQuota int = 50
 param llmFastVersion string = '2024-07-18'
 param llmSlowContext int = 128000
 param llmSlowDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions
 param llmSlowModel string = 'gpt-4o'
-param llmSlowQuota int = 300
+param llmSlowQuota int = 50
 param llmSlowVersion string = '2024-08-06'
 param location string = deployment().location
 param openaiLocation string
@@ -47,6 +48,7 @@ module app 'app.bicep' = {
   params: {
     cognitiveCommunicationLocation: cognitiveCommunicationLocation
     embeddingDeploymentType: embeddingDeploymentType
+    embeddingDimensions: embeddingDimensions
     embeddingModel: embeddingModel
     embeddingQuota: embeddingQuota
     embeddingVersion: embeddingVersion