Skip to content
This repository was archived by the owner on Jul 28, 2025. It is now read-only.

Commit 9e5fca1

Browse files
authored
CU-8693az82g Remove cdb tests side effects (#380)
* 8693az82g: Add method to CDBMaker to reset the CDB * 8693az82g: Add test in CDB tests to ensure a new CDB is used for each test * 8693az82g: Reset CDB in CDB tests before each test to avoid side effects
1 parent 70305f4 commit 9e5fca1

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

medcat/cdb_maker.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ def __init__(self, config: Config, cdb: Optional[CDB] = None) -> None:
4949
name='skip_and_punct',
5050
additional_fields=['is_punct'])
5151

52+
def reset_cdb(self) -> None:
53+
"""This will re-create a new internal CDB based on the same config.
54+
55+
This will be necessary if/when you're wishing to call `prepare_csvs`
56+
multiple times on the same object `CDBMaker` instance.
57+
"""
58+
self.cdb = CDB(config=self.config)
59+
5260
def prepare_csvs(self,
5361
csv_paths: Union[pd.DataFrame, List[str]],
5462
sep: str = ',',
@@ -59,6 +67,12 @@ def prepare_csvs(self,
5967
only_existing_cuis: bool = False, **kwargs) -> CDB:
6068
r"""Compile one or multiple CSVs into a CDB.
6169
70+
Note: This class/method generally uses the same instance of the CDB.
71+
So if you're using the same CDBMaker and calling `prepare_csvs`
72+
multiple times, you are likely to get leakage from prior calls
73+
into new ones.
74+
To reset the CDB, call `reset_cdb`.
75+
6276
Args:
6377
csv_paths (Union[pd.DataFrame, List[str]]):
6478
An array of paths to the csv files that should be processed. Can also be an array of pd.DataFrames

tests/test_cdb.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,21 @@ def setUp(self) -> None:
2222
cdb_2_csv = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "examples", "cdb_2.csv")
2323
self.tmp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp")
2424
os.makedirs(self.tmp_dir, exist_ok=True)
25+
# resetting the CDB because otherwise the CDBMaker
26+
# will refer to and modify the same instance of the CDB
27+
# and this can (and does!) create side effects
28+
CDBTests.cdb_maker.reset_cdb()
2529
self.undertest = CDBTests.cdb_maker.prepare_csvs([cdb_csv, cdb_2_csv], full_build=True)
2630

2731
def tearDown(self) -> None:
2832
shutil.rmtree(self.tmp_dir)
2933

34+
def test_setup_changes_cdb(self):
35+
id1 = id(self.undertest)
36+
self.setUp()
37+
id2 = id(self.undertest)
38+
self.assertNotEqual(id1, id2)
39+
3040
def test_name2cuis(self):
3141
self.assertEqual({
3242
'second~csv': ['C0000239'],

0 commit comments

Comments
 (0)