Skip to content

Commit 6bb929f

Browse files
committed
Added YAML test format, further test abstractions
1 parent d68037d commit 6bb929f

File tree

7 files changed

+392
-201
lines changed

7 files changed

+392
-201
lines changed

eval_tests.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
{
1515
"description": "Most basic possible case",
1616
"response": "A & B",
17-
"expectedResult": {
17+
"expected_result": {
1818
"is_correct": true,
1919
"response_latex": "A \\cdot B"
2020
}
@@ -29,7 +29,7 @@
2929
{
3030
"description": "Works with variable names of any length",
3131
"response": "A & Test",
32-
"expectedResult": {
32+
"expected_result": {
3333
"is_correct": true,
3434
"response_latex": "A \\cdot \\mathrm{Test}"
3535
}
@@ -49,7 +49,7 @@
4949
{
5050
"description": "Tests transposed variables are correct",
5151
"response": "B & A",
52-
"expectedResult": {
52+
"expected_result": {
5353
"is_correct": true
5454
}
5555
}
@@ -63,7 +63,7 @@
6363
{
6464
"description": "Incorrect results marked as false",
6565
"response": "A | B",
66-
"expectedResult": {
66+
"expected_result": {
6767
"is_correct": false
6868
}
6969
}

eval_tests.yaml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# YAML supports comments: one reason to prefer it over JSON, which doesn't.
2+
---
3+
title: Trivial comparisons
4+
tests:
5+
- description: The response and answer are exactly the same, so the response should be considered correct.
6+
# Parameters can be given, but this field can be removed if none are needed
7+
params: {}
8+
answer: "A & B"
9+
response: "A & B"
10+
expected_result:
11+
is_correct: yes
12+
# Ensure that the latex returned (for the preview) is correct
13+
response_latex: "A \\cdot B"
14+
15+
- description: Multi-character variable names are also supported
16+
answer: "A & Test"
17+
response: "A & Test"
18+
expected_result:
19+
is_correct: yes
20+
response_latex: "A \\cdot \\mathrm{Test}"
21+
# Tests can be divided into sections using '---'
22+
---
23+
title: Trivial comparisons, but not identical
24+
tests:
25+
- description: Variables can appear in any order.
26+
answer: "A & B"
27+
response: "B & A"
28+
expected_result:
29+
is_correct: yes
30+
# Any other fields returned by the evaluation function can be tested too, but this is optional.
31+
32+
- description: > # Multi-line strings are possible for readability
33+
The wrong operator is used, so this is incorrect as
34+
the two expressions have different truth tables.
35+
answer: "A & B"
36+
response: "A | B"
37+
expected_result:
38+
is_correct: no
39+
---
40+
# This illustrates how sub-tests can be used to share the same answer and parameters
41+
# for multiple tests.
42+
title: More complex comparisons
43+
tests:
44+
- description: XOR can be implemented using NAND or NOR
45+
answer: "A ^ B"
46+
sub_tests:
47+
- description: "Using NAND:"
48+
response: "~(~(A & ~(A & B)) & ~(B & ~(A & B)))"
49+
expected_result:
50+
is_correct: yes
51+
- description: "Using NOR:"
52+
response: "~(~(~A | ~B) | ~(A | B))"
53+
expected_result:
54+
is_correct: yes
55+

evaluation_function/auto_tests.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import json
2+
import yaml
3+
from typing import Union
4+
5+
class TestFile:
6+
"""An abstraction over a test file, which may be in one of several different formats.
7+
Currently, JSON and YAML are supported.
8+
"""
9+
10+
def __init__(self, path: str) -> None:
11+
self.groups = []
12+
13+
# Attempt to open the given file. Exit with an error if this
14+
# is not possible.
15+
file_content = ""
16+
try:
17+
with open(path, "r") as test_file:
18+
file_content = test_file.read()
19+
except IOError as e:
20+
raise Exception(f'Failed to open test file: "{e}"')
21+
22+
# Get the file extension to determine which format should be used.
23+
extension = path.split(".")[-1]
24+
if extension == "json":
25+
try:
26+
questions = json.loads(file_content)
27+
28+
for question in questions:
29+
out = []
30+
title = question["title"]
31+
for part in question["parts"]:
32+
for response_area in part["responseAreas"]:
33+
params = response_area["params"]
34+
answer = response_area["answer"]
35+
for test in response_area["tests"]:
36+
test.update({"answer": answer})
37+
test.update({"params": params})
38+
out.append(SingleTest(test))
39+
self.groups.append({"title": title, "tests": out})
40+
41+
except KeyError as e:
42+
raise Exception(f'The key "{e.args[0]}" doesn\'t exist, or is in the wrong place.')
43+
except json.JSONDecodeError as e:
44+
raise Exception(f'Error parsing JSON: "{e}"')
45+
elif extension == "yaml":
46+
try:
47+
# Tests are organised in groups of separate YAML documents (separated by "---")
48+
docs = yaml.safe_load_all(file_content)
49+
for test_group in docs:
50+
tests = []
51+
title = test_group.get("title", "")
52+
for test in test_group.get("tests", []):
53+
# Add an empty params field if none was provided.
54+
if test.get("params") == None:
55+
test["params"] = {}
56+
57+
# Does this test have sub-tests?
58+
sub_tests = test.get("sub_tests")
59+
if sub_tests != None:
60+
params = test["params"]
61+
answer = test["answer"]
62+
63+
for sub_test in sub_tests:
64+
sub_test["params"] = params
65+
sub_test["answer"] = answer
66+
tests.append(SingleTest(sub_test))
67+
else:
68+
tests.append(SingleTest(test))
69+
70+
self.groups.append({"title": title, "tests": tests})
71+
except yaml.YAMLError as e:
72+
raise Exception(f'Error parsing YAML: {e}')
73+
else:
74+
raise Exception(f'"{extension}" files are not supported as a test format.')
75+
76+
class SingleTest:
77+
def __init__(self, test_dict: dict):
78+
self.response = test_dict.get("response", "")
79+
self.answer = test_dict.get("answer", "")
80+
self.params = test_dict.get("params", {})
81+
expected_result = test_dict.get("expected_result")
82+
if not expected_result:
83+
raise Exception("No expected result given for test")
84+
self.is_correct = expected_result.get("is_correct")
85+
self.results = expected_result
86+
self.desc = test_dict.get("description", "")
87+
88+
def evaluate(self, func) -> dict:
89+
return func(self.response, self.answer, self.params)
90+
91+
def compare(self, eval_result: dict) -> tuple[bool, str]:
92+
eval_correct = eval_result["is_correct"]
93+
94+
if eval_correct != self.is_correct:
95+
return (
96+
False,
97+
f"response \"{self.response}\" with answer \"{self.answer}\" was {'' if eval_correct else 'in'}correct: {eval_result['feedback']}\nTest description: {self.desc}"
98+
)
99+
100+
# Are there any other fields in the eval function result that need to be checked?
101+
if self.results != None:
102+
# Check each one in turn
103+
for key, value in self.results.items():
104+
actual_result_val = eval_result.get(key)
105+
if actual_result_val == None:
106+
return (False, f"No value returned for \"{key}\"")
107+
108+
if actual_result_val != value:
109+
return (
110+
False,
111+
f"expected {key} = \"{value}\", got {key} = \"{actual_result_val}\"\nTest description: {self.desc}"
112+
)
113+
114+
return (True, "")
115+
116+
117+
def auto_test(path, func):
118+
"""A decorator that adds the necessary infrastructure to run tests defined
119+
in an external data file.\n
120+
`path`: the path to the data file, relative to the eval function root.\n
121+
`func`: the function to test. Should usually be `evaluation_function`.
122+
"""
123+
def _auto_test(orig_class):
124+
def test_auto(self):
125+
# Creating a TestFile can fail for several reasons.
126+
# If so, an exception is raised with a suitable error message
127+
try:
128+
tests = TestFile(path)
129+
except Exception as e:
130+
self.fail(e)
131+
132+
# Successfully loaded
133+
for group in tests.groups:
134+
for test in group["tests"]:
135+
results = test.evaluate(func)
136+
self.assertTrue(*test.compare(results.to_dict()))
137+
138+
orig_class.test_auto = test_auto # Add the test_auto function to the class
139+
return orig_class
140+
return _auto_test

evaluation_function/evaluation_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import unittest
22

33
from .evaluation import Params, evaluation_function
4-
from .json_tests import auto_test
4+
from .auto_tests import auto_test
55

6-
@auto_test("eval_tests.json", evaluation_function)
6+
@auto_test("eval_tests.yaml", evaluation_function)
77
class TestEvaluationFunction(unittest.TestCase):
88
"""
99
TestCase Class used to test the algorithm.

evaluation_function/json_tests.py

Lines changed: 0 additions & 71 deletions
This file was deleted.

0 commit comments

Comments
 (0)