Skip to content

Commit

Permalink
Add tool to calculate performance
Browse files Browse the repository at this point in the history
  • Loading branch information
dinhanhx committed Aug 16, 2021
1 parent 0ea11a6 commit 03ad70b
Show file tree
Hide file tree
Showing 4 changed files with 3,033 additions and 0 deletions.
3 changes: 3 additions & 0 deletions py-scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
All files in this folder (`test_seen.jsonl`, `test_unseen.jsonl`, `calc_test.py`) are acquired from [dinhanhx/performance_calculation_tool_for_hm](https://github.com/dinhanhx/performance_calculation_tool_for_hm).

Importantly, `test_seen.jsonl`, `test_unseen.jsonl` **HAVE** labels.
30 changes: 30 additions & 0 deletions py-scripts/calc_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pretty_errors
import click
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score
from pathlib import Path

@click.command()
@click.option('--test_jsonl', type=str, help='Path to test_seen.jsonl or test_unseen.jsonl')
@click.option('--result_csv', type=str, help='Path to result csv of model that is tested on seen or unseen')
def calc_test(test_jsonl, result_csv):
phase_cases = {'test_seen.jsonl':1, 'test_unseen.jsonl': 2}
try:
case = phase_cases[Path(test_jsonl).name]
except KeyError:
case = '_'

test_df = pd.read_json(test_jsonl, lines=True)
result_df = pd.read_csv(result_csv)

if not tuple(test_df['id'].tolist()) == tuple(result_df['id'].tolist()):
result_df = result_df.set_index('id')
result_df = result_df.reindex(index=test_df['id'])
result_df = result_df.reset_index()

roc_auc = roc_auc_score(test_df['label'], result_df['proba'])
accuracy = accuracy_score(test_df['label'], result_df['label'])
print(f'Phase: {case}; AUC ROC: {roc_auc:.4f}; Accuracy: {accuracy:.4f}')

if '__main__' == __name__:
calc_test()
Loading

0 comments on commit 03ad70b

Please sign in to comment.