yezhengkai
diff --git a/‎im2latex/tests/support/im2latex_100k/4c0185889d.png
8.81 KB b/‎im2latex/tests/support/im2latex_100k/4c0185889d.png
8.81 KB
diff --git a/‎im2latex/tests/support/im2latex_100k/566cf0c6f5.png
6.23 KB b/‎im2latex/tests/support/im2latex_100k/566cf0c6f5.png
6.23 KB
diff --git a/‎im2latex/tests/support/im2latex_100k/7944775fc9.png
4.32 KB b/‎im2latex/tests/support/im2latex_100k/7944775fc9.png
4.32 KB
diff --git a/‎im2latex/tests/support/im2latex_100k/data_by_file_id.json
+17 b/‎im2latex/tests/support/im2latex_100k/data_by_file_id.json
+17
diff --git a/‎im2latex/tests/test_im2latex_inference.py
+54 b/‎im2latex/tests/test_im2latex_inference.py
+54
@@ -0,0 +1,17 @@
+{
+    "7944775fc9": {
+        "ground_truth_text": "\\alpha _ { 1 } ^ { r } \\gamma _ { 1 } + \\dots + \\alpha _ { N } ^ { r } \\gamma _ { N } = 0 \\quad ( r = 1 , . . . , R ) \\; ,",
+        "predicted_text": "\\alpha _ { 1 } ^ { \\gamma } \\gamma _ { 1 } + . . . + \\alpha _ { N } ^ { \\gamma } \\gamma _ { N } = 0 \\quad ( r = 1 , . . , R ) \\, ,",
+        "character_error_rate": 0.111
+    },
+    "566cf0c6f5": {
+        "ground_truth_text": "\\dot { z } _ { 1 } = - N ^ { z } ( z _ { 1 } ) = - g ( z _ { 1 } ) = - \\frac { z _ { 1 } } { P _ { z } ( z _ { 2 } - z _ { 1 } ) } ; ~ ~ ~ \\dot { z } _ { 2 } = - \\frac { z _ { 2 } } { P _ { z } ( z _ { 2 } - z _ { 1 } ) }",
+        "predicted_text": "\\dot { z } _ { 1 } = - N ^ { z } ( z _ { 1 } ) = - g ( z _ { 1 } ) = - \\frac { z _ { 1 } } { z _ { 2 } ( z _ { 2 } - z _ { 1 } ) } ; \\quad \\dot { z } _ { 2 } = - \\frac { z _ { 2 } } { \\bar { z } _ { z } ( z _ { 2 } - z _ { 1 } ) }",
+        "character_error_rate": 0.074
+    },
+    "4c0185889d": {
+        "ground_truth_text": "{ \\cal L } ( J ) = \\frac { 1 } { 2 } \\partial _ { \\mu } \\phi \\partial ^ { \\mu } \\phi + \\frac { J } { 2 } \\phi ^ { 2 } + \\frac { \\lambda \\mu ^ { 2 \\varepsilon } } { 4 ! } \\phi ^ { 4 } + { \\cal L } _ { \\mathrm { C T } } ( J ) - \\mu ^ { - 2 \\varepsilon } \\frac { \\zeta } { 2 } \\; J ^ { 2 } .",
+        "predicted_text": "{ \\cal L } ( J ) = \\frac { 1 } { 2 } \\partial _ { \\mu } \\phi \\partial ^ { \\mu } \\phi + \\frac { 1 } { 2 } \\phi ^ { 2 } + \\frac { \\lambda \\mu ^ { 2 } } { 4 ! } \\phi ^ { 4 } + { \\cal L } _ { \\mathrm { C T } } ( J ) - \\mu ^ { 2 } \\frac { \\xi } { 2 } \\, J ^ { 2 } .",
+        "character_error_rate": 0.154
+    }
+}
@@ -0,0 +1,54 @@
+"""Test for im2latex_inference module."""
+import json
+import os
+import time
+from pathlib import Path
+
+import editdistance
+
+from im2latex.im2latex_inference import Im2LatexInference
+
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+
+_FILE_DIRNAME = Path(__file__).parents[0].resolve()
+_SUPPORT_DIRNAME = _FILE_DIRNAME / "support" / "im2latex_100k"
+
+# restricting number of samples to prevent CirleCI running out of time
+_NUM_MAX_SAMPLES = 2 if os.environ.get("CIRCLECI", False) else 100
+
+
+def test_im2latex_inference():
+    """Test Im2LatexInference."""
+    support_filenames = list(_SUPPORT_DIRNAME.glob("*.png"))
+    with open(_SUPPORT_DIRNAME / "data_by_file_id.json", "r") as f:
+        support_data_by_file_id = json.load(f)
+
+    start_time = time.time()
+    reasoner = Im2LatexInference()
+    end_time = time.time()
+    print(f"Time taken to initialize Im2LatexInference: {round(end_time - start_time, 2)}s")
+
+    for i, support_filename in enumerate(support_filenames):
+        if i >= _NUM_MAX_SAMPLES:
+            break
+        expected_text = support_data_by_file_id[support_filename.stem]["predicted_text"]
+        start_time = time.time()
+        predicted_text = _test_im2latex_inference(support_filename, expected_text, reasoner)
+        end_time = time.time()
+        time_taken = round(end_time - start_time, 2)
+
+        cer = _character_error_rate(support_data_by_file_id[support_filename.stem]["ground_truth_text"], predicted_text)
+        print(f"Character error rate is {round(cer, 3)} for file {support_filename.name} (time taken: {time_taken}s)")
+
+
+def _test_im2latex_inference(image_filename: Path, expected_text: str, reasoner: Im2LatexInference):
+    """Test Im2LatexInference on 1 image."""
+    predicted_text = reasoner.predict(image_filename)
+    assert predicted_text == expected_text, f"predicted text does not match expected for {image_filename.name}"
+    return predicted_text
+
+
+def _character_error_rate(str_a: str, str_b: str) -> float:
+    """Return character error rate."""
+    return editdistance.eval(str_a, str_b) / max(len(str_a), len(str_b))