mongodb-labs · Jibola · Dec 11, 2024 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/.evergreen/config.yml b/.evergreen/config.yml
@@ -278,6 +278,7 @@ tasks:
 buildvariants:
   - name: test-semantic-kernel-python-rhel
     display_name: Semantic-Kernel RHEL Python
+    tags: [python]
     expansions:
       DIR: semantic-kernel-python
     run_on:
@@ -290,6 +291,7 @@ buildvariants:
 
   - name: test-semantic-kernel-csharp-rhel
     display_name: Semantic-Kernel RHEL CSharp
+    tags: [csharp]
     expansions:
       DIR: semantic-kernel-csharp
     run_on:
@@ -300,6 +302,7 @@ buildvariants:
 
   - name: test-langchain-python-rhel
     display_name: Langchain RHEL Python
+    tags: [python]
     expansions:
       DIR: langchain-python
     run_on:
@@ -311,6 +314,7 @@ buildvariants:
 
   - name: test-langgraph-python-rhel
     display_name: Langgraph RHEL Python
+    tags: [python]
     expansions:
       DIR: langgraph-python
     run_on:
@@ -321,8 +325,9 @@ buildvariants:
         batchtime: 10080  # 1 week
 
   # TODO: INTPYTHON-668
-  # - name: test-chatgpt-retrieval-plugin-rhel
+  # - name: test-chatgpt-retrieval-plugin-python-rhel
   #   display_name: ChatGPT Retrieval Plugin
+  #   tags: [python]
   #   expansions:
   #     DIR: chatgpt-retrieval-plugin
   #   run_on:
@@ -333,8 +338,9 @@ buildvariants:
   #       batchtime: 10080  # 1 week
 
   # TODO: INTPYTHON-669
-  # - name: test-llama-index-vectorstore-rhel
+  # - name: test-llama-index-vectorstore-python-rhel
   #   display_name: LlamaIndex RHEL Vector Store
+  #   tags: [python]
   #   expansions:
   #     DIR: llama-index-python-vectorstore
   #   run_on:
@@ -345,8 +351,9 @@ buildvariants:
   #     # - name: test-llama-index-remote
   #     #   batchtime: 10080  # 1 week
 
-  - name: test-docarray-rhel
+  - name: test-docarray-python-rhel
     display_name: DocArray RHEL
+    tags: [python]
     expansions:
       DIR: docarray
     run_on:
@@ -355,8 +362,9 @@ buildvariants:
       - name: test-docarray-local
       - name: test-docarray-remote
 
-  - name: test-pymongo-voyageai-rhel
+  - name: test-pymongo-voyageai-python-rhel
     display_name: PyMongo-VoyageAI RHEL
+    tags: [python]
     expansions:
       DIR: pymongo-voyageai
     run_on:
@@ -365,8 +373,9 @@ buildvariants:
       - name: test-pymongo-voyageai-local
       - name: test-pymongo-voyageai-remote
 
-  - name: test-crewai-tools-rhel
+  - name: test-crewai-tools-python-rhel
     display_name: CrewAI-Tools Ubuntu
+    tags: [python]
     expansions:
       DIR: crewai-tools
     run_on:
@@ -375,8 +384,9 @@ buildvariants:
       - name: test-crewai-tools-local
       - name: test-crewai-tools-remote
 
-  - name: test-haystack-embeddings-rhel
+  - name: test-haystack-embeddings-python-rhel
     display_name: Haystack Embeddings RHEL
+    tags: [python]
     expansions:
       DIR: haystack-embeddings
     run_on:
@@ -386,8 +396,9 @@ buildvariants:
       # TODO: INTPYTHON-465
       # - name: test-haystack-embeddings-remote
 
-  - name: test-haystack-fulltext-rhel
+  - name: test-haystack-fulltext-python-rhel
     display_name: Haystack FullText RHEL
+    tags: [python]
     expansions:
       DIR: haystack-fulltext
     run_on:
@@ -397,8 +408,9 @@ buildvariants:
       # TODO: INTPYTHON-465
       # - name: test-haystack-fulltext-remote
 
-  - name: test-langchaingo-ubuntu
+  - name: test-langchaingo-golang-ubuntu
     display_name: LangchainGo Ubuntu2204
+    tags: [golang]
     expansions:
       DIR: langchaingo-golang
     run_on:
@@ -409,6 +421,7 @@ buildvariants:
   # TODO: INTPYTHON-667
   # - name: test-langchain-js-ubuntu
   #   display_name: LangchainJS Ubuntu2204
+  #   tags: [javascript]
   #   expansions:
   #     DIR: langchain-js
   #   run_on:

diff --git a/.evergreen/lint_config.py b/.evergreen/lint_config.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""
+Pre-commit hook to check if buildvariant tasks contain required language tags.
+"""
+
+import logging
+import sys
+import yaml
+from pathlib import Path
+from typing import List, Dict, Any
+
+logging.basicConfig()
+logger = logging.getLogger(__file__)
+logger.setLevel(logging.DEBUG)
+
+
+CURRENT_DIR = Path(__file__).parent.resolve()
+CONFIG_YML = CURRENT_DIR / "config.yml"
+VALID_LANGUAGES = {"python", "golang", "javascript", "csharp"}
+
+
+def load_yaml_file(file_path: str) -> Dict[Any, Any]:
+    """Load and parse a YAML file."""
+    with open(file_path, "r", encoding="utf-8") as file:
+        return yaml.safe_load(file) or {}
+
+
+def check_buildvariants(data: Dict[Any, Any]) -> List[str]:
+    """
+    Check if buildvariant tasks contain at least one required language tag
+    as well as the language within the buildvariant name.
+
+    Example Buildvariant structure in YAML:
+    buildvariants:
+    - name: test-semantic-kernel-python-rhel
+        display_name: Semantic-Kernel RHEL Python
+        tags: [python]
+        expansions:
+        DIR: semantic-kernel-python
+        run_on:
+        - rhel87-small
+        tasks:
+        - name: test-semantic-kernel-python-local
+        - name: test-semantic-kernel-python-remote
+          batchtime: 10080  # 1 week
+
+    Args:
+        data: Parsed YAML data
+
+    Returns:
+        List of error messages for tasks missing required tags
+    """
+    errors = []
+
+    buildvariants = data.get("buildvariants", [])
+    if not isinstance(buildvariants, list):
+        return ["'buildvariants' should be a list"]
+
+    for i, buildvariant in enumerate(buildvariants):
+        if not isinstance(buildvariant, dict):
+            errors.append(f"buildvariants[{i}] should contain sub-fields")
+            continue
+
+        buildvariant_name = buildvariant.get("name", "")
+        if not buildvariant_name:
+            errors.append(f"buildvariants[{i}] is missing 'name'")
+            continue
+        else:
+            if all([f"-{lang}-" not in buildvariant_name for lang in VALID_LANGUAGES]):
+                errors.append(
+                    f"buildvariant '{buildvariant_name}' should contain one"
+                    f" '-[{', '.join(VALID_LANGUAGES)}]-' in its name"
+                    f"got: {buildvariant_name}",
+                )
+
+        buildvariant_display_name = buildvariant.get("display_name", buildvariant_name)
+
+        tags = buildvariant.get("tags", [])
+
+        if not isinstance(tags, list) or len(tags) != 1:
+            errors.append(
+                f"'tags' in buildvariant '{buildvariant_display_name}' should be a list of size 1"
+            )
+            continue
+
+        if tags[0] not in VALID_LANGUAGES:
+            errors.append(
+                f"buildvariant '{buildvariant_display_name}' has invalid tag '{tags[0]}'. "
+                f"Valid tags are: {', '.join(VALID_LANGUAGES)}"
+            )
+    return errors
+
+
+def main():
+    """Main function for the pre-commit hook."""
+    total_errors = 0
+
+    data = load_yaml_file(CONFIG_YML)
+    if not data:
+        raise FileNotFoundError(f"Failed to load or parse {CONFIG_YML}")
+
+    errors = check_buildvariants(data)
+
+    if errors:
+        logger.error("❌ Errors found in %s:", CONFIG_YML)
+        for error in errors:
+            logger.error("  - %s", error)
+        total_errors += len(errors)
+
+    if total_errors > 0:
+        logger.error("❌ Total errors found: %s", total_errors)
+        return 1
+    else:
+        logger.info("✅ %s passed AI/ML testing pipeline validation", CONFIG_YML)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -23,7 +23,7 @@ jobs:
           python-version: '3.10'
       - name: Install Python dependencies
         run: |
-          python -m pip install -U pip pre-commit
+          python -m pip install -U pip pre-commit pyyaml
       - name: Run linters
         run: |
           pre-commit run --hook-stage=manual --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -53,3 +53,12 @@ repos:
     rev: "v2.2.6"
     hooks:
       - id: codespell
+
+  - repo: local
+    hooks:
+      - id: check-buildvariant-tags
+        name: Check buildvariant language tags
+        entry: python3 .evergreen/lint_config.py
+        language: system
+        files: .evergreen/config.yml
+        args: ['--languages=python,golang,javascript,csharp']
diff --git a/README.md b/README.md
@@ -110,6 +110,8 @@ Test execution flow is defined in `.evergreen/config.yml`. The test pipeline's c
 - `run_on` -- Specified platform to run on. `rhel87-small` or `ubuntu2204-small` should be used by default. Any other distro may fail Atlas CLI setup.
 - `tasks` -- Tasks to run. See below for more details
 - `cron` -- The tests are run via a cron job on a nightly cadence. This can be modified by setting a different cadence. Cron jobs can be scheduled using [cron syntax](https://crontab.guru/#0_0_*_*_*)
+- `tags` -- This should include the language where the AI/ML is run. i.e. `[python, csharp, golang, javascript]` Any tagged language will populate the
+appropriate language-specific slack channel.
 
 **[Tasks](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Configuration-Files#tasks)** -- These are the "building blocks" of our runs. Here is where we consolidate the specific set of functions. The basic parameters to add are shown below
 
@@ -185,8 +187,10 @@ evergreen patch -p ai-ml-pipeline-testing --param REPO_ORG=caseyclements --param
 
 ### Handling Failing Tests
 
-If tests are found to be failing, and cannot be addressed quickly, the responsible team MUST create a JIRA ticket, and disable the relevant tests
+Tests are run periodically (nightly) and any failures will propagate into both the `dbx-ai-ml-testing-pipline-notifications` and `dbx-ai-ml-testing-pipeline-notifications-{language}` channel. Repo owners of this `ai-ml-testing-pipeline` library are required to join the `dbx-ai-ml-testing-pipeline-notifications`. Pipeline specific implementers must **at least** join `dbx-ai-ml-testing-pipline-notifications-{language}` (e.g. whomever implemented `langchain-js` must at least be a member of `dbx-ai-ml-testing-pipeline-notifications-js`).
+
+If tests are found to be failing, and cannot be addressed quickly, the responsible team MUST create a JIRA ticket within their team's project (e.g. a python failure should generate an `INTPYTHON` ticket), and disable the relevant tests
 in the `config.yml` file, with a comment about the JIRA ticket that will address it.
 
-This policy will help ensure that a single failing integration does not cause noise in the `dbx-ai-ml-testing-pipeline-notifications` that would mask other
+This policy will help ensure that a single failing integration does not cause noise in the `dbx-ai-ml-testing-pipeline-notifications` or `dbx-ai-ml-testing-pipeline-notifications-{language}` that would mask other
 failures.