Skip to content

Commit 30904a8

Browse files
Merge pull request #154 from lambda-feedback/tr129-experiment-with-syntactical-equivalence
Tr129 experiment with syntactical equivalence
2 parents 8cf12a9 + fc075bd commit 30904a8

File tree

4 files changed

+31
-30
lines changed

4 files changed

+31
-30
lines changed

app/evaluation_response_utilities.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,13 @@ def add_criteria_graph(self, name, graph):
3636
self._criteria_graphs.update({name: graph.json()})
3737

3838
def _serialise_feedback(self) -> str:
39-
return "<br>".join(x[1] if (isinstance(x, tuple) and len(x[1].strip())) > 0 else x for x in self._feedback)
39+
feedback = []
40+
for x in self._feedback:
41+
if (isinstance(x, tuple) and len(x[1].strip())) > 0:
42+
feedback.append(x[1].strip())
43+
elif len(x.strip()) > 0:
44+
feedback.append(x.strip())
45+
return "<br>".join(feedback)
4046

4147
def serialise(self, include_test_data=False) -> dict:
4248
out = dict(is_correct=self.is_correct, feedback=self._serialise_feedback())

app/feedback/symbolic_comparison.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from ..criteria_utilities import Criterion
22

3+
34
# TODO: Find better way of identifying reference criteria
45
# equivalences dictionary should contain a list of variations that are likely to be produced by the following procedure:
56
# - rewrite critera as expr=0,
@@ -8,18 +9,13 @@
89
equivalences = dict()
910
criteria = dict()
1011

11-
criteria["RESPONSE_EQUAL_ANSWER"] = Criterion("response=answer")
12-
equivalences.update({"RESPONSE_EQUAL_ANSWER": ["response=answer", "answer=response", "answer-response=0", "-answer+response=0", "answer/response=1", "response/answer-1=0"]})
13-
criteria["RESPONSE_EQUAL_ANSWER"][True] = lambda inputs: "The response matches the expected answer."
14-
criteria["RESPONSE_EQUAL_ANSWER"][False] = lambda inputs: "The response does not match the expected answer."
15-
1612
criteria["RESPONSE_DOUBLE_ANSWER"] = Criterion("response=2*answer")
17-
equivalences.update({"RESPONSE_DOUBLE_ANSWER": ["response=2*answer","response/answer=2", "2*answer=response", "answer=response/2", "answer-response/2", "-answer+response/2", "-2*answer+response", "2*answer-response", "-2+answer/response", "-2+response/answer", "answer-1*response/2", "-answer+1*response/2", "-2+1*answer/response", "-2+1*response/answer"]})
13+
equivalences.update({"RESPONSE_DOUBLE_ANSWER": ["response=2*answer", "response/answer=2", "2*answer=response", "answer=response/2", "answer-response/2", "-answer+response/2", "-2*answer+response", "2*answer-response", "-2+answer/response", "-2+response/answer", "answer-1*response/2", "-answer+1*response/2", "-2+1*answer/response", "-2+1*response/answer"]})
1814
criteria["RESPONSE_DOUBLE_ANSWER"][True] = lambda inputs: "The response is the expected answer multiplied by 2."
1915
criteria["RESPONSE_DOUBLE_ANSWER"][False] = lambda inputs: "The response is not the expected answer multiplied by 2."
2016

2117
criteria["RESPONSE_NEGATIVE_ANSWER"] = Criterion("response=-answer")
22-
equivalences.update({"RESPONSE_NEGATIVE_ANSWER": ["response=-answer", "answer=-response", "answer+response=0", "answer+response","answer/response=-1", "response/answer+1"]})
18+
equivalences.update({"RESPONSE_NEGATIVE_ANSWER": ["response=-answer", "answer=-response", "answer+response=0", "answer+response", "answer/response=-1", "response/answer+1"]})
2319
criteria["RESPONSE_NEGATIVE_ANSWER"][True] = lambda inputs: "The response is the expected answer multiplied by -1."
2420
criteria["RESPONSE_NEGATIVE_ANSWER"][False] = lambda inputs: "The response is not the expected answer multiplied by -1."
2521

@@ -65,7 +61,7 @@
6561
"EXPRESSION_NOT_EQUALITY": "The response was an expression but was expected to be an equality.",
6662
"EQUALITY_NOT_EXPRESSION": "The response was an equality but was expected to be an expression.",
6763
"WITHIN_TOLERANCE": None, # "The difference between the response the answer is within specified error tolerance.",
68-
"NOT_NUMERICAL": None, #"The expression cannot be evaluated numerically.",
64+
"NOT_NUMERICAL": None, # "The expression cannot be evaluated numerically.",
6965
}[tag]
7066
feedback_generators["GENERIC"] = lambda tag: lambda inputs: {
7167
"TRUE": None,

app/symbolic_comparison_evaluation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def same_symbols(unused_input):
228228
label+"_SAME_SYMBOLS"+"_TRUE",
229229
summary=str(lhs)+" has the same symbols as "+str(rhs),
230230
details=str(lhs)+" has the same (free) symbols as "+str(rhs)+".",
231-
feedback_string_generator=symbolic_feedback_generators["response=answer"]("FALSE")
231+
feedback_string_generator=symbolic_feedback_generators["SAME_SYMBOLS"]("FALSE")
232232
)
233233
graph.attach(label+"_SAME_SYMBOLS"+"_TRUE", END.label)
234234
graph.attach(

app/symbolic_comparison_evaluation_tests.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,11 +1052,11 @@ def test_no_reserved_keywords_in_old_format_input_symbol_alternatives(self):
10521052
@pytest.mark.parametrize(
10531053
"response, answer, criteria, value, feedback_tags, additional_params",
10541054
[
1055-
("a+b", "b+a", "answer=response", True, ["RESPONSE_EQUAL_ANSWER"], {}),
1055+
("a+b", "b+a", "answer=response", True, ["answer=response_TRUE"], {}),
10561056
#("a+b", "b+a", "not(answer=response)", False, [], {}),
1057-
("a+b", "b+a", "answer-response=0", True, ["RESPONSE_EQUAL_ANSWER"], {}),
1058-
("a+b", "b+a", "answer/response=1", True, ["RESPONSE_EQUAL_ANSWER"], {}),
1059-
("a+b", "b+a", "answer=response, answer-response=0, answer/response=1", True, ["RESPONSE_EQUAL_ANSWER"], {}),
1057+
("a+b", "b+a", "answer-response=0", True, ["answer-response=0_TRUE"], {}),
1058+
("a+b", "b+a", "answer/response=1", True, ["answer/response=1_TRUE"], {}),
1059+
("a+b", "b+a", "answer=response, answer-response=0, answer/response=1", True, ["answer=response_TRUE", "answer-response=0_TRUE", "answer/response=1_TRUE"], {}),
10601060
("2a", "a", "response/answer=2", True, ["RESPONSE_DOUBLE_ANSWER"], {}),
10611061
("2a", "a", "2*answer = response", True, ["RESPONSE_DOUBLE_ANSWER"], {}),
10621062
("2a", "a", "answer = response/2", True, ["RESPONSE_DOUBLE_ANSWER"], {}),
@@ -1145,21 +1145,21 @@ def test_disabled_evaluation_nodes(self, response, answer, criteria, value, disa
11451145
@pytest.mark.parametrize(
11461146
"response, answer, criteria, value, feedback_tags, additional_params",
11471147
[
1148-
("2", "2", "response=answer", True, ["response=answer_SYNTACTICAL_EQUIVALENCE_TRUE", "response=answer_SAME_SYMBOLS_TRUE"], {}),
1149-
("4/2", "2", "answer=response", True, ["answer=response_SYNTACTICAL_EQUIVALENCE_FALSE"], {}),
1150-
("2+x-x", "2", "answer=response", True, ["answer=response_SAME_SYMBOLS_FALSE"], {}),
1151-
("2+2*I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_CARTESIAN"], {}),
1152-
("2+2I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_CARTESIAN"], {}),
1153-
("2.00+2.00*I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_CARTESIAN"], {}),
1148+
("2", "2", "response=answer", True, ["response=answer_TRUE", "response=answer_SYNTACTICAL_EQUIVALENCE_TRUE", "response=answer_SAME_SYMBOLS_TRUE", "response=answer_SAME_FORM_CARTESIAN"], {}),
1149+
("4/2", "2", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1150+
("2+x-x", "2", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_UNKNOWN", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_SYMBOLS_FALSE"], {}),
1151+
("2+2*I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_TRUE", "answer=response_SAME_FORM_CARTESIAN"], {}),
1152+
("2+2I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_CARTESIAN"], {}),
1153+
("2.00+2.00*I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_CARTESIAN"], {}),
11541154
("3+3I", "2+2*I", "answer=response", False, ["answer=response_FALSE", "answer=response_SAME_FORM_CARTESIAN"], {}),
1155-
("2(1+I)", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1156-
("2I+2", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1157-
("4/2+6/3*I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1158-
("2*e^(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1159-
("2*E^(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1160-
("2*exp(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1161-
("2*e**(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1162-
("e**(2*I)", "1*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1155+
("2(1+I)", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1156+
("2I+2", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1157+
("4/2+6/3*I", "2+2*I", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_UNKNOWN"], {}),
1158+
("2*e^(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1159+
("2*E^(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_TRUE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1160+
("2*exp(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1161+
("2*e**(2*I)", "2*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
1162+
("e**(2*I)", "1*e^(2*I)", "answer=response", True, ["answer=response_TRUE", "answer=response_SAME_SYMBOLS_TRUE", "answer=response_SYNTACTICAL_EQUIVALENCE_FALSE", "answer=response_SAME_FORM_EXPONENTIAL"], {}),
11631163
("0.48+0.88*i", "1*e^(0.5*I)", "answer=response", False, ["answer=response_FALSE", "answer=response_SAME_FORM_UNKNOWN"], {}),
11641164
]
11651165
)
@@ -1173,8 +1173,7 @@ def test_syntactical_comparison(self, response, answer, criteria, value, feedbac
11731173
params.update(additional_params)
11741174
result = evaluation_function(response, answer, params, include_test_data=True)
11751175
assert result["is_correct"] is value
1176-
for feedback_tag in feedback_tags:
1177-
assert feedback_tag in result["tags"]
1176+
assert set(feedback_tags) == set(result["tags"])
11781177

11791178
@pytest.mark.parametrize(
11801179
"response, answer, value",

0 commit comments

Comments
 (0)