Add tool to calculate performance

dinhanhx · Aug 16, 2021 · 03ad70b · 03ad70b
1 parent 0ea11a6
commit 03ad70b
Show file tree

Hide file tree

Showing 4 changed files with 3,033 additions and 0 deletions.
diff --git a/py-scripts/README.md b/py-scripts/README.md
@@ -0,0 +1,3 @@
+All files in this folder (`test_seen.jsonl`, `test_unseen.jsonl`, `calc_test.py`) are acquired from [dinhanhx/performance_calculation_tool_for_hm](https://github.com/dinhanhx/performance_calculation_tool_for_hm). 
+
+Importantly, `test_seen.jsonl`, `test_unseen.jsonl` **HAVE** labels. 
diff --git a/py-scripts/calc_test.py b/py-scripts/calc_test.py
@@ -0,0 +1,30 @@
+import pretty_errors
+import click
+import pandas as pd
+from sklearn.metrics import roc_auc_score, accuracy_score
+from pathlib import Path
+
+@click.command()
+@click.option('--test_jsonl', type=str, help='Path to test_seen.jsonl or test_unseen.jsonl')
+@click.option('--result_csv', type=str, help='Path to result csv of model that is tested on seen or unseen')
+def calc_test(test_jsonl, result_csv):
+    phase_cases = {'test_seen.jsonl':1, 'test_unseen.jsonl': 2}
+    try:
+        case = phase_cases[Path(test_jsonl).name]
+    except KeyError:
+        case = '_'
+
+    test_df = pd.read_json(test_jsonl, lines=True)
+    result_df = pd.read_csv(result_csv)
+
+    if not tuple(test_df['id'].tolist()) == tuple(result_df['id'].tolist()):
+        result_df = result_df.set_index('id')
+        result_df = result_df.reindex(index=test_df['id'])
+        result_df = result_df.reset_index()
+
+    roc_auc = roc_auc_score(test_df['label'], result_df['proba'])
+    accuracy = accuracy_score(test_df['label'], result_df['label'])
+    print(f'Phase: {case}; AUC ROC: {roc_auc:.4f}; Accuracy: {accuracy:.4f}')
+
+if '__main__' == __name__:
+    calc_test()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		All files in this folder (`test_seen.jsonl`, `test_unseen.jsonl`, `calc_test.py`) are acquired from [dinhanhx/performance_calculation_tool_for_hm](https://github.com/dinhanhx/performance_calculation_tool_for_hm).

		Importantly, `test_seen.jsonl`, `test_unseen.jsonl` HAVE labels.