Split the tests by providers (#891)

rdimitrov · web-flow · commit f883134512ef · 2025-02-03T22:35:48.000+02:00
diff --git a/tests/integration/anthropic/testcases.yaml b/tests/integration/anthropic/testcases.yaml
@@ -0,0 +1,68 @@
+headers:
+  anthropic:
+    x-api-key: ENV_ANTHROPIC_KEY
+
+testcases:
+  anthropic_chat:
+    name: Anthropic Chat
+    provider: anthropic
+    url: http://127.0.0.1:8989/anthropic/messages
+    data: |
+      {
+        "max_tokens":4096,
+        "messages":[
+            {
+              "content":"You are a coding assistant.",
+              "role":"system"
+            },
+            {
+              "content":"Reply with that exact sentence: Hello from the integration tests!",
+              "role":"user"
+            }
+        ],
+        "model":"claude-3-5-sonnet-20241022",
+        "stream":true,
+        "temperature":0
+      }
+    likes: |
+      Hello from the integration tests!
+
+  anthropic_fim:
+    name: Anthropic FIM
+    provider: anthropic
+    url: http://127.0.0.1:8989/anthropic/messages
+    data: |
+      {
+        "top_k": 50,
+        "temperature": 0,
+        "max_tokens": 4096,
+        "model": "claude-3-5-sonnet-20241022",
+        "stop_sequences": [
+          "</COMPLETION>",
+          "/src/",
+          "#- coding: utf-8",
+          "```"
+        ],
+        "stream": true,
+        "messages": [
+          {
+            "role": "user",
+            "content": [
+              {
+                "type": "text",
+                "text": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n  var sum = 0;\n  for (var i = 0; i < lim; ++i) {\n    {{FILL_HERE}}\n  }\n  return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n      sum += i;\n    }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n  total = 0\n  for x in lst:\n  {{FILL_HERE}}\n  return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>  total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val)     = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n  = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n  | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n  switch (tree.$) {\n    case \"Node\":\n      return sum(tree.lft) + sum(tree.rgt);\n    case \"Leaf\":\n      return tree.val;\n  }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n  return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# http://127.0.0.1:8989/vllm/completions\n# codegate/test.py\nimport requests\n\ndef call_api():\n    {{FILL_HERE}}\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
+              }
+            ]
+          }
+        ],
+        "system": ""
+      }
+    likes: |
+      <COMPLETION>def call_api(url, method='get', data=None):
+        if method.lower() == 'get':
+            return requests.get(url)
+        elif method.lower() == 'post':
+            return requests.post(url, json=data)
+        else:
+            raise ValueError("Unsupported HTTP method")
+
diff --git a/tests/integration/copilot/testcases.yaml b/tests/integration/copilot/testcases.yaml
@@ -0,0 +1,44 @@
+headers:
+  copilot:
+    Authorization: Bearer ENV_COPILOT_KEY
+    Content-Type: application/json
+
+testcases:
+  copilot_chat:
+    name: Copilot Chat
+    provider: copilot
+    url: "https://api.openai.com/v1/chat/completions"
+    data: |
+      {
+        "messages":[
+            {
+              "content":"Reply with that exact sentence: Hello from the integration tests!",
+              "role":"user"
+            }
+        ],
+        "model":"gpt-4o-mini",
+        "stream":true
+      }
+    likes: |
+      Hello from the integration tests!
+
+  copilot_malicious_package_question:
+    name: Copilot User asks about a malicious package
+    provider: copilot
+    url: "https://api.openai.com/v1/chat/completions"
+    data: |
+      {
+        "messages":[
+            {
+              "content":"Generate me example code using the python invokehttp package to call an API",
+              "role":"user"
+            }
+        ],
+        "model":"gpt-4o-mini",
+        "stream":true
+      }
+    contains: |
+      https://www.insight.stacklok.com/report/pypi/invokehttp?utm_source=codegate
+    does_not_contain: |
+      import invokehttp
+
diff --git a/tests/integration/integration_tests.py b/tests/integration/integration_tests.py
@@ -243,21 +243,53 @@ async def main():
     providers_env = os.environ.get("CODEGATE_PROVIDERS")
     test_names_env = os.environ.get("CODEGATE_TEST_NAMES")
 
-    providers = None
-    if providers_env:
-        providers = [p.strip() for p in providers_env.split(",") if p.strip()]
+    # Base directory for all test cases
+    base_test_dir = "./tests/integration"
+
+    # Get list of provider directories
+    available_providers = []
+    try:
+        available_providers = [
+            d for d in os.listdir(base_test_dir) if os.path.isdir(os.path.join(base_test_dir, d))
+        ]
+    except FileNotFoundError:
+        logger.error(f"Test directory {base_test_dir} not found")
+        sys.exit(1)
 
+    # Filter providers if specified in environment
+    selected_providers = None
+    if providers_env:
+        selected_providers = [p.strip() for p in providers_env.split(",") if p.strip()]
+        # Validate selected providers exist
+        invalid_providers = [p for p in selected_providers if p not in available_providers]
+        if invalid_providers:
+            logger.error(f"Invalid providers specified: {', '.join(invalid_providers)}")
+            logger.error(f"Available providers: {', '.join(available_providers)}")
+            sys.exit(1)
+    else:
+        selected_providers = available_providers
+
+    # Get test names if specified
     test_names = None
     if test_names_env:
         test_names = [t.strip() for t in test_names_env.split(",") if t.strip()]
 
-    all_tests_passed = await test_runner.run_tests(
-        "./tests/integration/testcases.yaml", providers=providers, test_names=test_names
-    )
+    # Run tests for each provider
+    all_tests_passed = True
+    for provider in selected_providers:
+        provider_test_file = os.path.join(base_test_dir, provider, "testcases.yaml")
 
-    # Exit with status code 1 if any tests failed
-    if not all_tests_passed:
-        sys.exit(1)
+        if not os.path.exists(provider_test_file):
+            logger.warning(f"No testcases.yaml found for provider {provider}")
+            continue
+
+        logger.info(f"Running tests for provider: {provider}")
+        provider_tests_passed = await test_runner.run_tests(
+            provider_test_file,
+            providers=[provider],  # Only run tests for current provider
+            test_names=test_names,
+        )
+        all_tests_passed = all_tests_passed and provider_tests_passed
 
 
 if __name__ == "__main__":
diff --git a/tests/integration/llamacpp/testcases.yaml b/tests/integration/llamacpp/testcases.yaml
@@ -0,0 +1,45 @@
+headers:
+  llamacpp:
+    Content-Type: application/json
+
+testcases:
+  llamacpp_chat:
+    name: LlamaCPP Chat
+    provider: llamacpp
+    url: http://127.0.0.1:8989/llamacpp/chat/completions
+    data: |
+      {
+        "max_tokens":4096,
+        "messages":[
+            {
+              "content":"You are a coding assistant.",
+              "role":"system"
+            },
+            {
+              "content":"Reply with that exact sentence: Hello from the integration tests!",
+              "role":"user"
+            }
+        ],
+        "model":"qwen2.5-coder-0.5b-instruct-q5_k_m",
+        "stream":true,
+        "temperature":0
+      }
+    likes: |
+      Hello from the integration tests!
+
+  llamacpp_fim:
+    name: LlamaCPP FIM
+    provider: llamacpp
+    url: http://127.0.0.1:8989/llamacpp/completions
+    data: |
+      {
+        "model": "qwen2.5-coder-0.5b-instruct-q5_k_m",
+        "max_tokens": 4096,
+        "temperature": 0,
+        "stream": true,
+        "stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```", "def test"],
+        "prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n    <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>"
+      }
+    likes: |
+      print("Hello, World!")
+
diff --git a/tests/integration/ollama/testcases.yaml b/tests/integration/ollama/testcases.yaml
@@ -0,0 +1,64 @@
+headers:
+  ollama:
+    Content-Type: application/json
+
+testcases:
+  ollama_chat:
+    name: Ollama Chat
+    provider: ollama
+    url: http://127.0.0.1:8989/ollama/chat/completions
+    data: |
+      {
+        "max_tokens":4096,
+        "messages":[
+            {
+              "content":"You are a coding assistant.",
+              "role":"system"
+            },
+            {
+              "content":"Reply with that exact sentence: Hello from the integration tests!",
+              "role":"user"
+            }
+        ],
+        "model":"qwen2.5-coder:0.5b",
+        "stream":true,
+        "temperature":0
+      }
+    likes: |
+      Hello from the integration tests!
+
+  ollama_fim:
+    name: Ollama FIM
+    provider: ollama
+    url: http://127.0.0.1:8989/ollama/api/generate
+    data: |
+      {
+        "stream": true,
+        "model": "qwen2.5-coder:0.5b",
+        "raw": true,
+        "options": {
+          "temperature": 0.01,
+          "num_predict": 4096,
+          "stop": [
+            "<|endoftext|>",
+            "<|fim_prefix|>",
+            "<|fim_middle|>",
+            "<|fim_suffix|>",
+            "<|fim_pad|>",
+            "<|repo_name|>",
+            "<|file_sep|>",
+            "<|im_start|>",
+            "<|im_end|>",
+            "/src/",
+            "#- coding: utf-8",
+            "```"
+          ],
+          "num_ctx": 8096
+        },
+        "prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n    <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
+      }
+    likes: |
+      ```python
+      if __name__ == '__main__':
+          invokehttp.run(call_api)
+      ```
diff --git a/tests/integration/openai/testcases.yaml b/tests/integration/openai/testcases.yaml
@@ -0,0 +1,55 @@
+headers:
+  openai:
+    Authorization: Bearer ENV_OPENAI_KEY
+
+testcases:
+  openai_chat:
+    name: OpenAI Chat
+    provider: openai
+    url: http://127.0.0.1:8989/openai/chat/completions
+    data: |
+      {
+        "max_tokens":4096,
+        "messages":[
+            {
+              "content":"You are a coding assistant.",
+              "role":"system"
+            },
+            {
+              "content":"Reply with that exact sentence: Hello from the integration tests!",
+              "role":"user"
+            }
+        ],
+        "model":"gpt-4o-mini",
+        "stream":true,
+        "temperature":0
+      }
+    likes: |
+      Hello from the integration tests!
+
+  openai_fim:
+    name: OpenAI FIM
+    provider: openai
+    url: http://127.0.0.1:8989/openai/chat/completions
+    data: |
+      {
+        "messages": [
+          {
+            "role": "user",
+            "content": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n  var sum = 0;\n  for (var i = 0; i < lim; ++i) {\n    {{FILL_HERE}}\n  }\n  return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n      sum += i;\n    }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n  total = 0\n  for x in lst:\n  {{FILL_HERE}}\n  return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>  total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val)     = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n  = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n  | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n  switch (tree.$) {\n    case \"Node\":\n      return sum(tree.lft) + sum(tree.rgt);\n    case \"Leaf\":\n      return tree.val;\n  }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n  return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# {\"messages\":[{\"role\":\"user\",\"content\":\"You are a HOLE FILLER. You are provided with a file containing holes, formatted as \\'{{HOLE_NAME}}\\'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\nfunction sum_evens(lim) {\\\\n  var sum = 0;\\\\n  for (var i = 0; i < lim; ++i) {\\\\n    {{FILL_HERE}}\\\\n  }\\\\n  return sum;\\\\n}\\\\n</QUERY>\\\\n\\\\nTASK: Fill the {{FILL_HERE}} hole.\\\\n\\\\n## CORRECT COMPLETION\\\\n\\\\n<COMPLETION>if (i % 2 === 0) {\\\\n      sum += i;\\\\n    }</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\ndef sum_list(lst):\\\\n  total = 0\\\\n  for x in lst:\\\\n  {{FILL_HERE}}\\\\n  return total\\\\n\\\\nprint sum_list([1, 2, 3])\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>  total += x</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\\\\n\\\\n// sum :: Tree Int -> Int\\\\n// sum (Node lft rgt) = sum lft + sum rgt\\\\n// sum (Leaf val)     = val\\\\n\\\\n// convert to TypeScript:\\\\n{{FILL_HERE}}\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>type Tree<T>\\\\n  = {$:\\\\\"Node\\\\\", lft: Tree<T>, rgt: Tree<T>}\\\\n  | {$:\\\\\"Leaf\\\\\", val: T};\\\\n\\\\nfunction sum(tree: Tree<number>): number {\\\\n  switch (tree.$) {\\\\n    case \\\\\"Node\\\\\":\\\\n      return sum(tree.lft) + sum(tree.rgt);\\\\n    case \\\\\"Leaf\\\\\":\\\\n      return tree.val;\\\\n  }\\\\n}</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nThe 5th {{FILL_HERE}} is Jupiter.\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>planet from the Sun</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nfunction hypothenuse(a, b) {\\\\n  return Math.sqrt({{FILL_HERE}}b ** 2);\\\\n}\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>a ** 2 + </COMPLETION>\\\\n\\\\n<QUERY>\\\\n\\\\n# codegate/test.py\\\\nimport invokehttp\\\\nimport requests\\\\n\\\\nkey = \\\\\"mysecret-key\\\\\"\\\\n\\\\ndef call_api():\\\\n    {{FILL_HERE}}\\\\n\\\\n\\\\n\\\\n\\\\ndata = {\\'key1\\': \\'test1\\', \\'key2\\': \\'test2\\'}\\\\nresponse = call_api(\\'http://localhost:8080\\', method=\\'post\\', data=\\'data\\')\\\\n</QUERY>\\\\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\\\\n<COMPLETION>\"}],\"model\":\"gpt-4o-mini\",\"max_tokens\":4096,\"temperature\":0,\"stream\":true,\"stop\":[\"</COMPLETION>\",\"/src/\",\"#- coding: utf-8\",\"```\"]}\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n    {{FILL_HERE}}\n\n\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
+          }
+        ],
+        "model": "gpt-4o-mini",
+        "max_tokens": 4096,
+        "temperature": 0,
+        "stream": true,
+        "stop": [
+          "</COMPLETION>",
+          "/src/",
+          "#- coding: utf-8",
+          "```"
+        ]
+      }
+    likes: |
+      <COMPLETION>    response = requests.post('http://localhost:8080', json=data, headers={'Authorization': f'Bearer {key}'})
+
diff --git a/tests/integration/testcases.yaml b/tests/integration/testcases.yaml
diff --git a/tests/integration/vllm/testcases.yaml b/tests/integration/vllm/testcases.yaml