Merge pull request #148 from lambda-feedback/tr126-proof-of-concept-error-identification

KarlLundengaard · web-flow · commit dc1002ca69a3 · 2024-04-18T10:34:20.000Z
Tr126 proof of concept error identification
diff --git a/app/criteria_graph_utilities.py b/app/criteria_graph_utilities.py
diff --git a/app/evaluation_response_utilities.py b/app/evaluation_response_utilities.py
@@ -4,6 +4,7 @@ def __init__(self):
         self.latex = None
         self._feedback = [] # A list that will hold all feedback items
         self._feedback_tags = {}  # A dictionary that holds a list with indices to all feedback items with the same tag
+        self._criteria_graphs = {}
         self.latex = ""
         self.simplified = ""
 
@@ -24,13 +25,17 @@ def add_feedback(self, feedback_item):
             raise TypeError("Feedback must be on the form (tag, feedback).")
         self._feedback_tags
 
+    def add_criteria_graph(self, name, graph):
+        self._criteria_graphs.update({name: graph.json()})
+
     def _serialise_feedback(self) -> str:
         return "<br>".join(x[1] if (isinstance(x, tuple) and len(x[1].strip())) > 0 else x for x in self._feedback)
 
     def serialise(self, include_test_data=False) -> dict:
         out = dict(is_correct=self.is_correct, feedback=self._serialise_feedback())
+        out.update(dict(tags=list(self._feedback_tags.keys())))
         if include_test_data is True:
-            out.update(dict(tags=self._feedback_tags))
+            out.update(dict(criteria_graphs=self._criteria_graphs))
         if self.latex is not None:
             out.update(dict(response_latex=self.latex))
         if self.simplified is not None:
diff --git a/app/example_tests.py b/app/example_tests.py
@@ -121,7 +121,7 @@ def test_checking_the_value_of_a_physical_quantity(self, response, answer, respo
         result = evaluation_function(response, answer, params, include_test_data=True)
         assert preview["latex"] == response_latex
         assert result["response_latex"] == response_latex
-        assert tags == {tag for (tag, feedback) in result["tags"].items() if len(feedback) > 0}
+        assert tags == set(result["tags"])
         assert result["is_correct"] == value
 
     @pytest.mark.parametrize(
diff --git a/app/quantity_comparison_evaluation_tests.py b/app/quantity_comparison_evaluation_tests.py
@@ -178,15 +178,15 @@ def test_convert_units(self, ans, res):
     def test_si_units_check_tag(self, ans, res, tag):
         params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
         result = evaluation_function(res, ans, params, include_test_data=True)
-        assert tag in result["tags"].keys()
+        assert tag in result["tags"]
         assert result["is_correct"] is False
 
     def test_si_units_parse_error(self):
         ans = "-10.5 kg m/s^2"
         res = "-10.5 kg m/s^"
         params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
         result = evaluation_function(res, ans, params, include_test_data=True)
-        assert "PARSE_EXCEPTION" in result["tags"].keys()
+        assert "PARSE_EXCEPTION" in result["tags"]
         assert result["is_correct"] is False
 
     @pytest.mark.parametrize(
@@ -210,7 +210,7 @@ def test_demo_si_units_demo_a(self, res, is_correct, tag):
         ans = "-10.5 kilogram metre/second^2"
         params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
         result = evaluation_function(res, ans, params, include_test_data=True)
-        assert tag in result["tags"].keys()
+        assert tag in result["tags"]
         assert result["is_correct"] is is_correct
 
     @pytest.mark.parametrize(
@@ -226,7 +226,7 @@ def test_demo_si_units_demo_b(self, res, ans, is_correct, tag, latex):
         params = {"strict_syntax": False, "physical_quantity": True, "units_string": "SI", "strictness": "strict"}
         result = evaluation_function(res, ans, params, include_test_data=True)
         assert result["response_latex"] == latex
-        assert tag in result["tags"].keys()
+        assert tag in result["tags"]
         assert result["is_correct"] == is_correct
 
 if __name__ == "__main__":
diff --git a/app/slr_parsing_utilities.py b/app/slr_parsing_utilities.py
@@ -235,7 +235,10 @@ def traverse_postfix(expr_node, action):
 
 
 def traverse_infix(expr_node, action):
-    return [(False, expr_node.children[0])]+[(True, action(expr_node))]+[(False, expr_node.children[1])]
+    out = []
+    for x in expr_node.children[0:-1]:
+        out += [(False, x), (True, action(expr_node))]
+    return out+[(False, expr_node.children[-1])]
 
 
 def traverse_group(expr_node, action):
diff --git a/app/symbolic_comparison_evaluation.py b/app/symbolic_comparison_evaluation.py
@@ -1,5 +1,5 @@
 from sympy.parsing.sympy_parser import T as parser_transformations
-from sympy import Abs, Equality, latex, pi, Symbol, Add, Pow
+from sympy import Abs, Equality, latex, pi, Symbol, Add, Pow, Mul
 from sympy.printing.latex import LatexPrinter
 from copy import deepcopy
 
@@ -155,10 +155,10 @@ def evaluation_node_internal(unused_input):
     rhs = criterion.children[1].content_string()
     END = CriteriaGraph.END
     graph.add_node(END)
-    graph.add_evaluation_node(label, summary=label, details="Checks if "+str(lhs)+"="+str(rhs)+".", evaluate=evaluation_node_internal)
-    graph.attach(label, label+"_TRUE", summary=str(lhs)+"="+str(rhs), details=str(lhs)+" is equal to "+str(rhs)+".")
+    graph.add_evaluation_node(label, summary=label, details="Checks if "+lhs+"="+rhs+".", evaluate=evaluation_node_internal)
+    graph.attach(label, label+"_TRUE", summary=lhs+"="+rhs, details=lhs+" is equal to "+rhs+".")
     graph.attach(label+"_TRUE", END.label)
-    graph.attach(label, label+"_FALSE", summary=str(lhs)+"=\="+str(rhs), details=str(lhs)+" is not equal to"+str(rhs)+".")
+    graph.attach(label, label+"_FALSE", summary=lhs+"=\\="+rhs, details=lhs+" is not equal to"+rhs+".")
     graph.attach(label+"_FALSE", END.label)
     return graph
 
@@ -194,6 +194,15 @@ def addition_to_subtraction(node, k):
     variations = replace_node_variations(expression, Add, addition_to_subtraction)
     return variations
 
+def one_swap_addition_and_multiplication(expression):
+    def addition_to_multiplication(node, k):
+        return node - node.args[k-1] - node.args[k] + node.args[k-1] * node.args[k]
+    def multiplication_to_addition(node, k):
+        return node - 2*node.args[k]
+    variations = replace_node_variations(expression, Add, addition_to_multiplication)
+    variations += replace_node_variations(expression, Mul, addition_to_multiplication)
+    return variations
+
 def one_exponent_flip(expression):
     def exponent_flip(node, k):
         return node**(-1)
@@ -232,10 +241,10 @@ def expression_check(unused_input):
     graph = CriteriaGraph(label)
     END = CriteriaGraph.END
     graph.add_node(END)
-    graph.add_evaluation_node(label, summary=label, details="Checks if "+str(expression)+" where "+str(subs)+".", evaluate=create_expression_check(expression))
-    graph.attach(label, label+"_TRUE", summary=str(expression)+" where "+str(subs), details=str(expression)+" where "+str(subs)+"is true.")
+    graph.add_evaluation_node(label, summary=label, details="Checks if "+expression.content_string()+" where "+", ".join([s.content_string() for s in subs])+".", evaluate=create_expression_check(expression))
+    graph.attach(label, label+"_TRUE", summary=expression.content_string()+" where "+", ".join([s.content_string() for s in subs]), details=expression.content_string()+" where "+", ".join([s.content_string() for s in subs])+"is true.")
     graph.attach(label+"_TRUE", END.label)
-    graph.attach(label, label+"_FALSE", summary="not "+str(expression), details=str(expression)+" is not true with"+str(subs)+".")
+    graph.attach(label, label+"_FALSE", summary="not "+expression.content_string(), details=expression.content_string()+" is not true when "+", ".join([s.content_string() for s in subs])+".")
 
     reserved_expressions = list(parameters_dict["reserved_expressions"].items())
     response = parameters_dict["reserved_expressions"]["response"]
@@ -252,12 +261,17 @@ def expression_check(unused_input):
         variation_groups = {
             "ONE_ADDITION_TO_SUBTRACTION": {
                 "variations": one_addition_to_subtraction(expression_to_vary),
-                "summary": lambda expression, variations: str(expression)+" is true if one addition is changed to a subtraction or vice versa.",
+                "summary": lambda expression, variations: criterion.children[0].content_string()+" if one addition is changed to a subtraction or vice versa.",
                 "details": lambda expression, variations: "The following expressions are checked: "+", ".join([str(e) for e in variations]),
             },
             "ONE_EXPONENT_FLIP": {
                 "variations": one_exponent_flip(expression_to_vary),
-                "summary": lambda expression, variations: str(expression)+" is true if one exponent has its sign changed.",
+                "summary": lambda expression, variations: criterion.children[0].content_string()+" is true if one exponent has its sign changed.",
+                "details": lambda expression, variations: "The following expressions are checked: "+", ".join([str(e) for e in variations]),
+            },
+            "ONE_SWAP_ADDITION_AND_MULTIPLICATION": {
+                "variations": one_swap_addition_and_multiplication(expression_to_vary),
+                "summary": lambda expression, variations: criterion.children[0].content_string()+" is true if one addition is replaced with a multiplication or vice versa.",
                 "details": lambda expression, variations: "The following expressions are checked: "+", ".join([str(e) for e in variations]),
             }
         }
@@ -292,8 +306,9 @@ def get_candidates(unused_input):
                 graph.attach(
                     label+"_"+group_label,
                     label+"_GET_CANDIDATES_"+group_label,
-                    summary="Get candidate responses that satisfy "+str(expression),
-                    details="Get candidate responses that satisfy "+str(expression), evaluate=get_candidates
+                    summary="Get candidate responses that satisfy "+expression.content_string(),
+                    details="Get candidate responses that satisfy "+expression.content_string(),
+                    evaluate=get_candidates
                 )
 
             for (value, expressions) in values_and_expressions.items():
@@ -303,7 +318,7 @@ def get_candidates(unused_input):
                         graph.attach(
                             label+"_GET_CANDIDATES_"+group_label,
                             "response candidates "+expressions_string,
-                            summary="Response candidates: "+expressions_string,
+                            summary="response = "+str(value),
                             details="Response candidates: "+expressions_string
                         )
                         graph.attach(
@@ -355,6 +370,9 @@ def create_criteria_graphs(criteria, params_dict):
         label = criterion.label.strip()
         graph_template = graph_templates.get(label, criterion_eval_node)
         graph = graph_template(criterion, params_dict)
+        for evaluation in graph.evaluations.values():
+            if evaluation.label in params_dict.get("disabled_evaluation_nodes", set()):
+                evaluation.replacement = CriteriaGraph.END
         criteria_graphs.update({criterion.content_string(): graph})
     return criteria_graphs
 
@@ -503,16 +521,15 @@ def symbolic_comparison(response, answer, params, eval_response) -> dict:
         "reference_criteria_strings": reference_criteria_strings,
         "symbolic_comparison_criteria": symbolic_comparison_criteria,
         "eval_response": eval_response,
+        "disabled_evaluation_nodes": params.get("disabled_evaluation_nodes", set())
     }
     criteria_graphs = create_criteria_graphs(criteria_parsed, parameters_dict)
     criteria_feedback = set()
-    #for criterion in criteria_parsed:
     for (criterion_identifier, graph) in criteria_graphs.items():
         main_criteria = criterion_identifier+"_TRUE"
         criteria_feedback = criteria_feedback.union(graph.generate_feedback(response, main_criteria))
-        #criteria_feedback = criteria_feedback.union(criterion_eval_node(criterion, parameters_dict).generate_feedback(response, main_criteria))
-        #is_correct = is_correct and check_criterion(criterion, parameters_dict)
         is_correct = is_correct and main_criteria in criteria_feedback
+        eval_response.add_criteria_graph(criterion_identifier, graph)
         result = main_criteria in criteria_feedback
         for item in criteria_feedback:
             eval_response.add_feedback((item, ""))
diff --git a/app/symbolic_comparison_evaluation_tests.py b/app/symbolic_comparison_evaluation_tests.py
@@ -1104,7 +1104,7 @@ def test_no_reserved_keywords_in_old_format_input_symbol_alternatives(self):
             ("3", "x+1", "response=answer where x=2", True, ["response=answer where x=2_TRUE"], {}),
             ("1", "x+1", "response=answer where x=2", False, ["response=answer where x=2_ONE_ADDITION_TO_SUBTRACTION", "response candidates x - 1"], {}),
             ("5/3", "x/y+1", "response=answer where x=2; y=3", True, ["response=answer where x=2; y=3_TRUE"], {}),
-            ("15", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_EXPONENT_FLIP"], {}), #NOTE: Sympy reporesents input as (x+y)/y so flipping the exponent gives (x+y)*y instead of x*y+1
+            ("15", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_EXPONENT_FLIP"], {}), #NOTE: Sympy represents input as (x+y)/y so flipping the exponent gives (x+y)*y instead of x*y+1
             ("-1/3", "x/y+1", "response=answer where x=2; y=3", False, ["response=answer where x=2; y=3_ONE_ADDITION_TO_SUBTRACTION"], {}),
             ("13", "x+y*z-1", "response=answer where x=2; y=3; z=4", True, [], {}),
         ]
@@ -1121,6 +1121,27 @@ def test_criteria_based_comparison(self, response, answer, criteria, value, feed
         for feedback_tag in feedback_tags:
             assert feedback_tag in result["tags"]
 
+    @pytest.mark.parametrize(
+        "response, answer, criteria, value, disabled_evaluation_nodes, expected_feedback_tags, disabled_feedback_tags, additional_params",
+        [
+            ("8", "x+y*z**2-1", "response=answer where x=4; y=3; z=2", False, ["response=answer where x=4; y=3; z=2_GET_CANDIDATES_ONE_SWAP_ADDITION_AND_MULTIPLICATION"], ["response=answer where x=4; y=3; z=2_ONE_SWAP_ADDITION_AND_MULTIPLICATION"], ["response candidates -x + y*z**2"], {}),
+        ]
+    )
+    def test_disabled_evaluation_nodes(self, response, answer, criteria, value, disabled_evaluation_nodes, expected_feedback_tags, disabled_feedback_tags, additional_params):
+        params = {
+            "strict_syntax": False,
+            "elementary_functions": True,
+            "criteria": criteria,
+            "disabled_evaluation_nodes": disabled_evaluation_nodes
+        }
+        params.update(additional_params)
+        result = evaluation_function(response, answer, params, include_test_data=True)
+        assert result["is_correct"] is value
+        for feedback_tag in expected_feedback_tags:
+            assert feedback_tag in result["tags"]
+        for feedback_tag in disabled_feedback_tags:
+            assert feedback_tag not in result["tags"]
+
     @pytest.mark.parametrize(
         "response, answer, value",
         [