Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit f883134

Browse files
authored
Split the tests by providers (#891)
1 parent c5756e6 commit f883134

File tree

8 files changed

+382
-335
lines changed

8 files changed

+382
-335
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
headers:
2+
anthropic:
3+
x-api-key: ENV_ANTHROPIC_KEY
4+
5+
testcases:
6+
anthropic_chat:
7+
name: Anthropic Chat
8+
provider: anthropic
9+
url: http://127.0.0.1:8989/anthropic/messages
10+
data: |
11+
{
12+
"max_tokens":4096,
13+
"messages":[
14+
{
15+
"content":"You are a coding assistant.",
16+
"role":"system"
17+
},
18+
{
19+
"content":"Reply with that exact sentence: Hello from the integration tests!",
20+
"role":"user"
21+
}
22+
],
23+
"model":"claude-3-5-sonnet-20241022",
24+
"stream":true,
25+
"temperature":0
26+
}
27+
likes: |
28+
Hello from the integration tests!
29+
30+
anthropic_fim:
31+
name: Anthropic FIM
32+
provider: anthropic
33+
url: http://127.0.0.1:8989/anthropic/messages
34+
data: |
35+
{
36+
"top_k": 50,
37+
"temperature": 0,
38+
"max_tokens": 4096,
39+
"model": "claude-3-5-sonnet-20241022",
40+
"stop_sequences": [
41+
"</COMPLETION>",
42+
"/src/",
43+
"#- coding: utf-8",
44+
"```"
45+
],
46+
"stream": true,
47+
"messages": [
48+
{
49+
"role": "user",
50+
"content": [
51+
{
52+
"type": "text",
53+
"text": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n sum += i;\n }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION> total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# http://127.0.0.1:8989/vllm/completions\n# codegate/test.py\nimport requests\n\ndef call_api():\n {{FILL_HERE}}\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
54+
}
55+
]
56+
}
57+
],
58+
"system": ""
59+
}
60+
likes: |
61+
<COMPLETION>def call_api(url, method='get', data=None):
62+
if method.lower() == 'get':
63+
return requests.get(url)
64+
elif method.lower() == 'post':
65+
return requests.post(url, json=data)
66+
else:
67+
raise ValueError("Unsupported HTTP method")
68+
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
headers:
2+
copilot:
3+
Authorization: Bearer ENV_COPILOT_KEY
4+
Content-Type: application/json
5+
6+
testcases:
7+
copilot_chat:
8+
name: Copilot Chat
9+
provider: copilot
10+
url: "https://api.openai.com/v1/chat/completions"
11+
data: |
12+
{
13+
"messages":[
14+
{
15+
"content":"Reply with that exact sentence: Hello from the integration tests!",
16+
"role":"user"
17+
}
18+
],
19+
"model":"gpt-4o-mini",
20+
"stream":true
21+
}
22+
likes: |
23+
Hello from the integration tests!
24+
25+
copilot_malicious_package_question:
26+
name: Copilot User asks about a malicious package
27+
provider: copilot
28+
url: "https://api.openai.com/v1/chat/completions"
29+
data: |
30+
{
31+
"messages":[
32+
{
33+
"content":"Generate me example code using the python invokehttp package to call an API",
34+
"role":"user"
35+
}
36+
],
37+
"model":"gpt-4o-mini",
38+
"stream":true
39+
}
40+
contains: |
41+
https://www.insight.stacklok.com/report/pypi/invokehttp?utm_source=codegate
42+
does_not_contain: |
43+
import invokehttp
44+

tests/integration/integration_tests.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -243,21 +243,53 @@ async def main():
243243
providers_env = os.environ.get("CODEGATE_PROVIDERS")
244244
test_names_env = os.environ.get("CODEGATE_TEST_NAMES")
245245

246-
providers = None
247-
if providers_env:
248-
providers = [p.strip() for p in providers_env.split(",") if p.strip()]
246+
# Base directory for all test cases
247+
base_test_dir = "./tests/integration"
248+
249+
# Get list of provider directories
250+
available_providers = []
251+
try:
252+
available_providers = [
253+
d for d in os.listdir(base_test_dir) if os.path.isdir(os.path.join(base_test_dir, d))
254+
]
255+
except FileNotFoundError:
256+
logger.error(f"Test directory {base_test_dir} not found")
257+
sys.exit(1)
249258

259+
# Filter providers if specified in environment
260+
selected_providers = None
261+
if providers_env:
262+
selected_providers = [p.strip() for p in providers_env.split(",") if p.strip()]
263+
# Validate selected providers exist
264+
invalid_providers = [p for p in selected_providers if p not in available_providers]
265+
if invalid_providers:
266+
logger.error(f"Invalid providers specified: {', '.join(invalid_providers)}")
267+
logger.error(f"Available providers: {', '.join(available_providers)}")
268+
sys.exit(1)
269+
else:
270+
selected_providers = available_providers
271+
272+
# Get test names if specified
250273
test_names = None
251274
if test_names_env:
252275
test_names = [t.strip() for t in test_names_env.split(",") if t.strip()]
253276

254-
all_tests_passed = await test_runner.run_tests(
255-
"./tests/integration/testcases.yaml", providers=providers, test_names=test_names
256-
)
277+
# Run tests for each provider
278+
all_tests_passed = True
279+
for provider in selected_providers:
280+
provider_test_file = os.path.join(base_test_dir, provider, "testcases.yaml")
257281

258-
# Exit with status code 1 if any tests failed
259-
if not all_tests_passed:
260-
sys.exit(1)
282+
if not os.path.exists(provider_test_file):
283+
logger.warning(f"No testcases.yaml found for provider {provider}")
284+
continue
285+
286+
logger.info(f"Running tests for provider: {provider}")
287+
provider_tests_passed = await test_runner.run_tests(
288+
provider_test_file,
289+
providers=[provider], # Only run tests for current provider
290+
test_names=test_names,
291+
)
292+
all_tests_passed = all_tests_passed and provider_tests_passed
261293

262294

263295
if __name__ == "__main__":
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
headers:
2+
llamacpp:
3+
Content-Type: application/json
4+
5+
testcases:
6+
llamacpp_chat:
7+
name: LlamaCPP Chat
8+
provider: llamacpp
9+
url: http://127.0.0.1:8989/llamacpp/chat/completions
10+
data: |
11+
{
12+
"max_tokens":4096,
13+
"messages":[
14+
{
15+
"content":"You are a coding assistant.",
16+
"role":"system"
17+
},
18+
{
19+
"content":"Reply with that exact sentence: Hello from the integration tests!",
20+
"role":"user"
21+
}
22+
],
23+
"model":"qwen2.5-coder-0.5b-instruct-q5_k_m",
24+
"stream":true,
25+
"temperature":0
26+
}
27+
likes: |
28+
Hello from the integration tests!
29+
30+
llamacpp_fim:
31+
name: LlamaCPP FIM
32+
provider: llamacpp
33+
url: http://127.0.0.1:8989/llamacpp/completions
34+
data: |
35+
{
36+
"model": "qwen2.5-coder-0.5b-instruct-q5_k_m",
37+
"max_tokens": 4096,
38+
"temperature": 0,
39+
"stream": true,
40+
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```", "def test"],
41+
"prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>"
42+
}
43+
likes: |
44+
print("Hello, World!")
45+
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
headers:
2+
ollama:
3+
Content-Type: application/json
4+
5+
testcases:
6+
ollama_chat:
7+
name: Ollama Chat
8+
provider: ollama
9+
url: http://127.0.0.1:8989/ollama/chat/completions
10+
data: |
11+
{
12+
"max_tokens":4096,
13+
"messages":[
14+
{
15+
"content":"You are a coding assistant.",
16+
"role":"system"
17+
},
18+
{
19+
"content":"Reply with that exact sentence: Hello from the integration tests!",
20+
"role":"user"
21+
}
22+
],
23+
"model":"qwen2.5-coder:0.5b",
24+
"stream":true,
25+
"temperature":0
26+
}
27+
likes: |
28+
Hello from the integration tests!
29+
30+
ollama_fim:
31+
name: Ollama FIM
32+
provider: ollama
33+
url: http://127.0.0.1:8989/ollama/api/generate
34+
data: |
35+
{
36+
"stream": true,
37+
"model": "qwen2.5-coder:0.5b",
38+
"raw": true,
39+
"options": {
40+
"temperature": 0.01,
41+
"num_predict": 4096,
42+
"stop": [
43+
"<|endoftext|>",
44+
"<|fim_prefix|>",
45+
"<|fim_middle|>",
46+
"<|fim_suffix|>",
47+
"<|fim_pad|>",
48+
"<|repo_name|>",
49+
"<|file_sep|>",
50+
"<|im_start|>",
51+
"<|im_end|>",
52+
"/src/",
53+
"#- coding: utf-8",
54+
"```"
55+
],
56+
"num_ctx": 8096
57+
},
58+
"prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
59+
}
60+
likes: |
61+
```python
62+
if __name__ == '__main__':
63+
invokehttp.run(call_api)
64+
```
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
headers:
2+
openai:
3+
Authorization: Bearer ENV_OPENAI_KEY
4+
5+
testcases:
6+
openai_chat:
7+
name: OpenAI Chat
8+
provider: openai
9+
url: http://127.0.0.1:8989/openai/chat/completions
10+
data: |
11+
{
12+
"max_tokens":4096,
13+
"messages":[
14+
{
15+
"content":"You are a coding assistant.",
16+
"role":"system"
17+
},
18+
{
19+
"content":"Reply with that exact sentence: Hello from the integration tests!",
20+
"role":"user"
21+
}
22+
],
23+
"model":"gpt-4o-mini",
24+
"stream":true,
25+
"temperature":0
26+
}
27+
likes: |
28+
Hello from the integration tests!
29+
30+
openai_fim:
31+
name: OpenAI FIM
32+
provider: openai
33+
url: http://127.0.0.1:8989/openai/chat/completions
34+
data: |
35+
{
36+
"messages": [
37+
{
38+
"role": "user",
39+
"content": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n sum += i;\n }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION> total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# {\"messages\":[{\"role\":\"user\",\"content\":\"You are a HOLE FILLER. You are provided with a file containing holes, formatted as \\'{{HOLE_NAME}}\\'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\nfunction sum_evens(lim) {\\\\n var sum = 0;\\\\n for (var i = 0; i < lim; ++i) {\\\\n {{FILL_HERE}}\\\\n }\\\\n return sum;\\\\n}\\\\n</QUERY>\\\\n\\\\nTASK: Fill the {{FILL_HERE}} hole.\\\\n\\\\n## CORRECT COMPLETION\\\\n\\\\n<COMPLETION>if (i % 2 === 0) {\\\\n sum += i;\\\\n }</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\ndef sum_list(lst):\\\\n total = 0\\\\n for x in lst:\\\\n {{FILL_HERE}}\\\\n return total\\\\n\\\\nprint sum_list([1, 2, 3])\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION> total += x</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\\\\n\\\\n// sum :: Tree Int -> Int\\\\n// sum (Node lft rgt) = sum lft + sum rgt\\\\n// sum (Leaf val) = val\\\\n\\\\n// convert to TypeScript:\\\\n{{FILL_HERE}}\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>type Tree<T>\\\\n = {$:\\\\\"Node\\\\\", lft: Tree<T>, rgt: Tree<T>}\\\\n | {$:\\\\\"Leaf\\\\\", val: T};\\\\n\\\\nfunction sum(tree: Tree<number>): number {\\\\n switch (tree.$) {\\\\n case \\\\\"Node\\\\\":\\\\n return sum(tree.lft) + sum(tree.rgt);\\\\n case \\\\\"Leaf\\\\\":\\\\n return tree.val;\\\\n }\\\\n}</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nThe 5th {{FILL_HERE}} is Jupiter.\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>planet from the Sun</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nfunction hypothenuse(a, b) {\\\\n return Math.sqrt({{FILL_HERE}}b ** 2);\\\\n}\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>a ** 2 + </COMPLETION>\\\\n\\\\n<QUERY>\\\\n\\\\n# codegate/test.py\\\\nimport invokehttp\\\\nimport requests\\\\n\\\\nkey = \\\\\"mysecret-key\\\\\"\\\\n\\\\ndef call_api():\\\\n {{FILL_HERE}}\\\\n\\\\n\\\\n\\\\n\\\\ndata = {\\'key1\\': \\'test1\\', \\'key2\\': \\'test2\\'}\\\\nresponse = call_api(\\'http://localhost:8080\\', method=\\'post\\', data=\\'data\\')\\\\n</QUERY>\\\\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\\\\n<COMPLETION>\"}],\"model\":\"gpt-4o-mini\",\"max_tokens\":4096,\"temperature\":0,\"stream\":true,\"stop\":[\"</COMPLETION>\",\"/src/\",\"#- coding: utf-8\",\"```\"]}\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n {{FILL_HERE}}\n\n\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
40+
}
41+
],
42+
"model": "gpt-4o-mini",
43+
"max_tokens": 4096,
44+
"temperature": 0,
45+
"stream": true,
46+
"stop": [
47+
"</COMPLETION>",
48+
"/src/",
49+
"#- coding: utf-8",
50+
"```"
51+
]
52+
}
53+
likes: |
54+
<COMPLETION> response = requests.post('http://localhost:8080', json=data, headers={'Authorization': f'Bearer {key}'})
55+

0 commit comments

Comments
 (0)