Skip to content

Commit dc1002c

Browse files
Merge pull request #148 from lambda-feedback/tr126-proof-of-concept-error-identification
Tr126 proof of concept error identification
2 parents 7b98fe9 + 8d79344 commit dc1002c

7 files changed

+176
-106
lines changed

app/criteria_graph_utilities.py

Lines changed: 107 additions & 83 deletions
Large diffs are not rendered by default.

app/evaluation_response_utilities.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ def __init__(self):
44
self.latex = None
55
self._feedback = [] # A list that will hold all feedback items
66
self._feedback_tags = {} # A dictionary that holds a list with indices to all feedback items with the same tag
7+
self._criteria_graphs = {}
78
self.latex = ""
89
self.simplified = ""
910

@@ -24,13 +25,17 @@ def add_feedback(self, feedback_item):
2425
raise TypeError("Feedback must be on the form (tag, feedback).")
2526
self._feedback_tags
2627

28+
def add_criteria_graph(self, name, graph):
29+
self._criteria_graphs.update({name: graph.json()})
30+
2731
def _serialise_feedback(self) -> str:
2832
return "<br>".join(x[1] if (isinstance(x, tuple) and len(x[1].strip())) > 0 else x for x in self._feedback)
2933

3034
def serialise(self, include_test_data=False) -> dict:
3135
out = dict(is_correct=self.is_correct, feedback=self._serialise_feedback())
36+
out.update(dict(tags=list(self._feedback_tags.keys())))
3237
if include_test_data is True:
33-
out.update(dict(tags=self._feedback_tags))
38+
out.update(dict(criteria_graphs=self._criteria_graphs))
3439
if self.latex is not None:
3540
out.update(dict(response_latex=self.latex))
3641
if self.simplified is not None:

app/example_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def test_checking_the_value_of_a_physical_quantity(self, response, answer, respo
121121
result = evaluation_function(response, answer, params, include_test_data=True)
122122
assert preview["latex"] == response_latex
123123
assert result["response_latex"] == response_latex
124-
assert tags == {tag for (tag, feedback) in result["tags"].items() if len(feedback) > 0}
124+
assert tags == set(result["tags"])
125125
assert result["is_correct"] == value
126126

127127
@pytest.mark.parametrize(

app/quantity_comparison_evaluation_tests.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,15 +178,15 @@ def test_convert_units(self, ans, res):
178178
def test_si_units_check_tag(self, ans, res, tag):
179179
params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
180180
result = evaluation_function(res, ans, params, include_test_data=True)
181-
assert tag in result["tags"].keys()
181+
assert tag in result["tags"]
182182
assert result["is_correct"] is False
183183

184184
def test_si_units_parse_error(self):
185185
ans = "-10.5 kg m/s^2"
186186
res = "-10.5 kg m/s^"
187187
params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
188188
result = evaluation_function(res, ans, params, include_test_data=True)
189-
assert "PARSE_EXCEPTION" in result["tags"].keys()
189+
assert "PARSE_EXCEPTION" in result["tags"]
190190
assert result["is_correct"] is False
191191

192192
@pytest.mark.parametrize(
@@ -210,7 +210,7 @@ def test_demo_si_units_demo_a(self, res, is_correct, tag):
210210
ans = "-10.5 kilogram metre/second^2"
211211
params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
212212
result = evaluation_function(res, ans, params, include_test_data=True)
213-
assert tag in result["tags"].keys()
213+
assert tag in result["tags"]
214214
assert result["is_correct"] is is_correct
215215

216216
@pytest.mark.parametrize(
@@ -226,7 +226,7 @@ def test_demo_si_units_demo_b(self, res, ans, is_correct, tag, latex):
226226
params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
227227
result = evaluation_function(res, ans, params, include_test_data=True)
228228
assert result["response_latex"] == latex
229-
assert tag in result["tags"].keys()
229+
assert tag in result["tags"]
230230
assert result["is_correct"] == is_correct
231231

232232
if __name__ == "__main__":

app/slr_parsing_utilities.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,10 @@ def traverse_postfix(expr_node, action):
235235

236236

237237
def traverse_infix(expr_node, action):
238-
return [(False, expr_node.children[0])]+[(True, action(expr_node))]+[(False, expr_node.children[1])]
238+
out = []
239+
for x in expr_node.children[0:-1]:
240+
out += [(False, x), (True, action(expr_node))]
241+
return out+[(False, expr_node.children[-1])]
239242

240243

241244
def traverse_group(expr_node, action):

app/symbolic_comparison_evaluation.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from sympy.parsing.sympy_parser import T as parser_transformations
2-
from sympy import Abs, Equality, latex, pi, Symbol, Add, Pow
2+
from sympy import Abs, Equality, latex, pi, Symbol, Add, Pow, Mul
33
from sympy.printing.latex import LatexPrinter
44
from copy import deepcopy
55

@@ -155,10 +155,10 @@ def evaluation_node_internal(unused_input):
155155
rhs = criterion.children[1].content_string()
156156
END = CriteriaGraph.END
157157
graph.add_node(END)
158-
graph.add_evaluation_node(label, summary=label, details="Checks if "+str(lhs)+"="+str(rhs)+".", evaluate=evaluation_node_internal)
159-
graph.attach(label, label+"_TRUE", summary=str(lhs)+"="+str(rhs), details=str(lhs)+" is equal to "+str(rhs)+".")
158+
graph.add_evaluation_node(label, summary=label, details="Checks if "+lhs+"="+rhs+".", evaluate=evaluation_node_internal)
159+
graph.attach(label, label+"_TRUE", summary=lhs+"="+rhs, details=lhs+" is equal to "+rhs+".")
160160
graph.attach(label+"_TRUE", END.label)
161-
graph.attach(label, label+"_FALSE", summary=str(lhs)+"=\="+str(rhs), details=str(lhs)+" is not equal to"+str(rhs)+".")
161+
graph.attach(label, label+"_FALSE", summary=lhs+"=\\="+rhs, details=lhs+" is not equal to"+rhs+".")
162162
graph.attach(label+"_FALSE", END.label)
163163
return graph
164164

@@ -194,6 +194,15 @@ def addition_to_subtraction(node, k):
194194
variations = replace_node_variations(expression, Add, addition_to_subtraction)
195195
return variations
196196

197+
def one_swap_addition_and_multiplication(expression):
198+
def addition_to_multiplication(node, k):
199+
return node - node.args[k-1] - node.args[k] + node.args[k-1] * node.args[k]
200+
def multiplication_to_addition(node, k):
201+
return node - 2*node.args[k]
202+
variations = replace_node_variations(expression, Add, addition_to_multiplication)
203+
variations += replace_node_variations(expression, Mul, addition_to_multiplication)
204+
return variations
205+
197206
def one_exponent_flip(expression):
198207
def exponent_flip(node, k):
199208
return node**(-1)
@@ -232,10 +241,10 @@ def expression_check(unused_input):
232241
graph = CriteriaGraph(label)
233242
END = CriteriaGraph.END
234243
graph.add_node(END)
235-
graph.add_evaluation_node(label, summary=label, details="Checks if "+str(expression)+" where "+str(subs)+".", evaluate=create_expression_check(expression))
236-
graph.attach(label, label+"_TRUE", summary=str(expression)+" where "+str(subs), details=str(expression)+" where "+str(subs)+"is true.")
244+
graph.add_evaluation_node(label, summary=label, details="Checks if "+expression.content_string()+" where "+", ".join([s.content_string() for s in subs])+".", evaluate=create_expression_check(expression))
245+
graph.attach(label, label+"_TRUE", summary=expression.content_string()+" where "+", ".join([s.content_string() for s in subs]), details=expression.content_string()+" where "+", ".join([s.content_string() for s in subs])+"is true.")
237246
graph.attach(label+"_TRUE", END.label)
238-
graph.attach(label, label+"_FALSE", summary="not "+str(expression), details=str(expression)+" is not true with"+str(subs)+".")
247+
graph.attach(label, label+"_FALSE", summary="not "+expression.content_string(), details=expression.content_string()+" is not true when "+", ".join([s.content_string() for s in subs])+".")
239248

240249
reserved_expressions = list(parameters_dict["reserved_expressions"].items())
241250
response = parameters_dict["reserved_expressions"]["response"]
@@ -252,12 +261,17 @@ def expression_check(unused_input):
252261
variation_groups = {
253262
"ONE_ADDITION_TO_SUBTRACTION": {
254263
"variations": one_addition_to_subtraction(expression_to_vary),
255-
"summary": lambda expression, variations: str(expression)+" is true if one addition is changed to a subtraction or vice versa.",
264+
"summary": lambda expression, variations: criterion.children[0].content_string()+" if one addition is changed to a subtraction or vice versa.",
256265
"details": lambda expression, variations: "The following expressions are checked: "+", ".join([str(e) for e in variations]),
257266
},
258267
"ONE_EXPONENT_FLIP": {
259268
"variations": one_exponent_flip(expression_to_vary),
260-
"summary": lambda expression, variations: str(expression)+" is true if one exponent has its sign changed.",
269+
"summary": lambda expression, variations: criterion.children[0].content_string()+" is true if one exponent has its sign changed.",
270+
"details": lambda expression, variations: "The following expressions are checked: "+", ".join([str(e) for e in variations]),
271+
},
272+
"ONE_SWAP_ADDITION_AND_MULTIPLICATION": {
273+
"variations": one_swap_addition_and_multiplication(expression_to_vary),
274+
"summary": lambda expression, variations: criterion.children[0].content_string()+" is true if one addition is replaced with a multiplication or vice versa.",
261275
"details": lambda expression, variations: "The following expressions are checked: "+", ".join([str(e) for e in variations]),
262276
}
263277
}
@@ -292,8 +306,9 @@ def get_candidates(unused_input):
292306
graph.attach(
293307
label+"_"+group_label,
294308
label+"_GET_CANDIDATES_"+group_label,
295-
summary="Get candidate responses that satisfy "+str(expression),
296-
details="Get candidate responses that satisfy "+str(expression), evaluate=get_candidates
309+
summary="Get candidate responses that satisfy "+expression.content_string(),
310+
details="Get candidate responses that satisfy "+expression.content_string(),
311+
evaluate=get_candidates
297312
)
298313

299314
for (value, expressions) in values_and_expressions.items():
@@ -303,7 +318,7 @@ def get_candidates(unused_input):
303318
graph.attach(
304319
label+"_GET_CANDIDATES_"+group_label,
305320
"response candidates "+expressions_string,
306-
summary="Response candidates: "+expressions_string,
321+
summary="response = "+str(value),
307322
details="Response candidates: "+expressions_string
308323
)
309324
graph.attach(
@@ -355,6 +370,9 @@ def create_criteria_graphs(criteria, params_dict):
355370
label = criterion.label.strip()
356371
graph_template = graph_templates.get(label, criterion_eval_node)
357372
graph = graph_template(criterion, params_dict)
373+
for evaluation in graph.evaluations.values():
374+
if evaluation.label in params_dict.get("disabled_evaluation_nodes", set()):
375+
evaluation.replacement = CriteriaGraph.END
358376
criteria_graphs.update({criterion.content_string(): graph})
359377
return criteria_graphs
360378

@@ -503,16 +521,15 @@ def symbolic_comparison(response, answer, params, eval_response) -> dict:
503521
"reference_criteria_strings": reference_criteria_strings,
504522
"symbolic_comparison_criteria": symbolic_comparison_criteria,
505523
"eval_response": eval_response,
524+
"disabled_evaluation_nodes": params.get("disabled_evaluation_nodes", set())
506525
}
507526
criteria_graphs = create_criteria_graphs(criteria_parsed, parameters_dict)
508527
criteria_feedback = set()
509-
#for criterion in criteria_parsed:
510528
for (criterion_identifier, graph) in criteria_graphs.items():
511529
main_criteria = criterion_identifier+"_TRUE"
512530
criteria_feedback = criteria_feedback.union(graph.generate_feedback(response, main_criteria))
513-
#criteria_feedback = criteria_feedback.union(criterion_eval_node(criterion, parameters_dict).generate_feedback(response, main_criteria))
514-
#is_correct = is_correct and check_criterion(criterion, parameters_dict)
515531
is_correct = is_correct and main_criteria in criteria_feedback
532+
eval_response.add_criteria_graph(criterion_identifier, graph)
516533
result = main_criteria in criteria_feedback
517534
for item in criteria_feedback:
518535
eval_response.add_feedback((item, ""))

app/symbolic_comparison_evaluation_tests.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ def test_no_reserved_keywords_in_old_format_input_symbol_alternatives(self):
11041104
("3", "x+1", "response=answer where x=2", True, ["response=answer where x=2_TRUE"], {}),
11051105
("1", "x+1", "response=answer where x=2", False, ["response=answer where x=2_ONE_ADDITION_TO_SUBTRACTION", "response candidates x - 1"], {}),
11061106
("5/3", "x/y+1", "response=answer where x=2; y=3", True, ["response=answer where x=2; y=3_TRUE"], {}),
1107-
("15", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_EXPONENT_FLIP"], {}), #NOTE: Sympy reporesents input as (x+y)/y so flipping the exponent gives (x+y)*y instead of x*y+1
1107+
("15", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_EXPONENT_FLIP"], {}), #NOTE: Sympy represents input as (x+y)/y so flipping the exponent gives (x+y)*y instead of x*y+1
11081108
("-1/3", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_ADDITION_TO_SUBTRACTION"], {}),
11091109
("13", "x+y*z-1", "response=answer where x=2; y=3; z=4", True, [], {}),
11101110
]
@@ -1121,6 +1121,27 @@ def test_criteria_based_comparison(self, response, answer, criteria, value, feed
11211121
for feedback_tag in feedback_tags:
11221122
assert feedback_tag in result["tags"]
11231123

1124+
@pytest.mark.parametrize(
1125+
"response, answer, criteria, value, disabled_evaluation_nodes, expected_feedback_tags, disabled_feedback_tags, additional_params",
1126+
[
1127+
("8", "x+y*z**2-1", "response=answer where x=4; y=3; z=2", False, ["response=answer where x=4; y=3; z=2_GET_CANDIDATES_ONE_SWAP_ADDITION_AND_MULTIPLICATION"], ["response=answer where x=4; y=3; z=2_ONE_SWAP_ADDITION_AND_MULTIPLICATION"], ["response candidates -x + y*z**2"], {}),
1128+
]
1129+
)
1130+
def test_disabled_evaluation_nodes(self, response, answer, criteria, value, disabled_evaluation_nodes, expected_feedback_tags, disabled_feedback_tags, additional_params):
1131+
params = {
1132+
"strict_syntax": False,
1133+
"elementary_functions": True,
1134+
"criteria": criteria,
1135+
"disabled_evaluation_nodes": disabled_evaluation_nodes
1136+
}
1137+
params.update(additional_params)
1138+
result = evaluation_function(response, answer, params, include_test_data=True)
1139+
assert result["is_correct"] is value
1140+
for feedback_tag in expected_feedback_tags:
1141+
assert feedback_tag in result["tags"]
1142+
for feedback_tag in disabled_feedback_tags:
1143+
assert feedback_tag not in result["tags"]
1144+
11241145
@pytest.mark.parametrize(
11251146
"response, answer, value",
11261147
[

0 commit comments

Comments
 (0)