Skip to content

Commit 7d2b7b5

Browse files
authored
Merge pull request #1 from lambda-feedback/auto-docs-for-tests
Added the ability to write tests in JSON and run them automatically. This is intended to make it easier to auto-generate documentation from the evaluation_function unit tests.
2 parents cd0f01e + 169c51c commit 7d2b7b5

File tree

6 files changed

+163
-9
lines changed

6 files changed

+163
-9
lines changed

.DS_Store

6 KB
Binary file not shown.

docs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test_output.json

eval_tests.json

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
[
2+
{
3+
"title": "Basic expression equality and inequality",
4+
"masterContent": "Demonstrates trivial comparisons",
5+
"parts": [
6+
{
7+
"content": "The response and answer are exactly the same, so the response should be considered correct.",
8+
"responseAreas": [
9+
{
10+
"preResponseText": "",
11+
"answer": "A & B",
12+
"params": {},
13+
"tests": [
14+
{
15+
"description": "Most basic possible case",
16+
"response": "A & B",
17+
"expectedResult": {
18+
"is_correct": true,
19+
"response_latex": "A \\cdot B"
20+
}
21+
}
22+
]
23+
},
24+
{
25+
"preResponseText": "Multi-character variable names are supported.",
26+
"answer": "A & Test",
27+
"params": {},
28+
"tests": [
29+
{
30+
"description": "Works with variable names of any length",
31+
"response": "A & Test",
32+
"expectedResult": {
33+
"is_correct": true,
34+
"response_latex": "A \\cdot \\mathrm{Test}"
35+
}
36+
}
37+
]
38+
}
39+
]
40+
},
41+
{
42+
"content": "",
43+
"responseAreas": [
44+
{
45+
"preResponseText": "Transposition of variables:",
46+
"answer": "A & B",
47+
"params": {},
48+
"tests": [
49+
{
50+
"description": "Tests transposed variables are correct",
51+
"response": "B & A",
52+
"expectedResult": {
53+
"is_correct": true
54+
}
55+
}
56+
]
57+
},
58+
{
59+
"preResponseText": "Trivially incorrect response:",
60+
"answer": "A & B",
61+
"params": {},
62+
"tests": [
63+
{
64+
"description": "Incorrect results marked as false",
65+
"response": "A | B",
66+
"expectedResult": {
67+
"is_correct": false
68+
}
69+
}
70+
]
71+
}
72+
]
73+
}
74+
]
75+
}
76+
]

evaluation_function/evaluation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def evaluation_function(
1414
response: Any,
1515
answer: Any,
1616
params: Params,
17-
) -> dict:
17+
) -> Result:
1818
"""
1919
Function used to evaluate a student response.
2020
---

evaluation_function/evaluation_test.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import unittest
22

33
from .evaluation import Params, evaluation_function
4+
from .json_tests import auto_test
45

6+
@auto_test("eval_tests.json", evaluation_function)
57
class TestEvaluationFunction(unittest.TestCase):
68
"""
79
TestCase Class used to test the algorithm.
@@ -21,15 +23,18 @@ class TestEvaluationFunction(unittest.TestCase):
2123
as it should.
2224
"""
2325

26+
# Tests
27+
2428
def test_returns_is_correct_true_ascii(self):
2529
response, answer, params = "A & Test", "A & Test", Params()
2630

2731
result = evaluation_function(response, answer, params).to_dict()
2832

2933
self.assertEqual(result.get("is_correct"), True)
30-
self.assertEqual(result.get("response_latex"), "A \\cdot \mathrm{Test}")
34+
self.assertEqual(result.get("response_latex"),
35+
"A \\cdot \mathrm{Test}")
3136
self.assertFalse(result.get("feedback"))
32-
37+
3338
def test_returns_negative(self):
3439
response, answer, params = "A | B", "A & B", Params()
3540

@@ -38,7 +43,7 @@ def test_returns_negative(self):
3843
self.assertEqual(result.get("is_correct"), False)
3944
self.assertEqual(result.get("response_latex"), "A + B")
4045
self.assertTrue(result.get("feedback"))
41-
46+
4247
def test_syntax_error(self):
4348
response, answer, params = "A £ B", "A & B", Params()
4449

@@ -47,7 +52,7 @@ def test_syntax_error(self):
4752
self.fail("Exception not raised for unexpected token")
4853
except:
4954
pass
50-
55+
5156
def test_xor_identity(self):
5257
response, answer, params = "A ^ B", "A & ~B | ~A & B", Params()
5358

@@ -56,7 +61,7 @@ def test_xor_identity(self):
5661
self.assertEqual(result.get("is_correct"), True)
5762
self.assertEqual(result.get("response_latex"), "A \\oplus B")
5863
self.assertFalse(result.get("feedback"))
59-
64+
6065
def test_nand_or(self):
6166
response, answer, params = "A | B", "~(~A & ~B)", Params()
6267

@@ -65,7 +70,7 @@ def test_nand_or(self):
6570
self.assertEqual(result.get("is_correct"), True)
6671
self.assertEqual(result.get("response_latex"), "A + B")
6772
self.assertFalse(result.get("feedback"))
68-
73+
6974
def test_nand_or(self):
7075
response, answer, params = "A | B", "~(~A & ~B)", Params()
7176

@@ -81,9 +86,10 @@ def test_nor_nand(self):
8186
result = evaluation_function(response, answer, params).to_dict()
8287

8388
self.assertEqual(result.get("is_correct"), True)
84-
self.assertEqual(result.get("response_latex"), "\\overline{\\left( A \\cdot B \\right)}")
89+
self.assertEqual(result.get("response_latex"),
90+
"\\overline{\\left( A \\cdot B \\right)}")
8591
self.assertFalse(result.get("feedback"))
86-
92+
8793
def test_complex(self):
8894
response, answer, params = "A & B | B & C & (B | C)", "B & (A | C)", Params()
8995

evaluation_function/json_tests.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import json
2+
3+
class TestData:
4+
def __init__(self, test_dict: dict):
5+
self.response = test_dict["response"]
6+
self.answer = test_dict["answer"]
7+
self.params = test_dict["params"]
8+
expected_result = test_dict["expectedResult"]
9+
self.is_correct = expected_result["is_correct"]
10+
self.results = expected_result
11+
self.desc = test_dict["description"]
12+
13+
def evaluate(self, func) -> dict:
14+
return func(self.response, self.answer, self.params)
15+
16+
def compare(self, eval_result: dict) -> tuple[bool, str]:
17+
eval_correct = eval_result["is_correct"]
18+
19+
if eval_correct != self.is_correct:
20+
return (
21+
False,
22+
f"response \"{self.response}\" with answer \"{self.answer}\" was {'' if eval_correct else 'in'}correct: {eval_result['feedback']}\nTest description: {self.desc}"
23+
)
24+
25+
# Are there any other fields in the eval function result that need to be checked?
26+
if self.results != None:
27+
# Check each one in turn
28+
for key, value in self.results.items():
29+
actual_result_val = eval_result.get(key)
30+
if actual_result_val == None:
31+
return (False, f"No value returned for \"{key}\"")
32+
33+
if actual_result_val != value:
34+
return (
35+
False,
36+
f"expected {key} = \"{value}\", got {key} = \"{actual_result_val}\"\nTest description: {self.desc}"
37+
)
38+
39+
return (True, "")
40+
41+
42+
def get_tests_from_json(filename: str) -> list[TestData]:
43+
out = []
44+
questions = []
45+
with open(filename, "r") as test_file:
46+
test_json = test_file.read()
47+
questions = json.loads(test_json)
48+
# Convert the structured test data into a flat list of tests
49+
for question in questions:
50+
for part in question["parts"]:
51+
for response_area in part["responseAreas"]:
52+
params = response_area["params"]
53+
answer = response_area["answer"]
54+
for test in response_area["tests"]:
55+
test.update({"answer": answer})
56+
test.update({"params": params})
57+
out.append(TestData(test))
58+
59+
return out
60+
61+
def auto_test(path, func):
62+
def _auto_test(orig_class):
63+
def test_auto(self):
64+
tests = get_tests_from_json(path)
65+
for test in tests:
66+
results = test.evaluate(func)
67+
self.assertTrue(*test.compare(results.to_dict()))
68+
69+
orig_class.test_auto = test_auto # Add the test_auto function to the class
70+
return orig_class
71+
return _auto_test

0 commit comments

Comments
 (0)