Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CU-8698gkrqa: Add argument to allow specifying the changes warrenting a model save #525

Merged
merged 1 commit into from
Mar 28, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions medcat/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,12 @@ def get_model_card(self, as_dict: bool = False):
else:
return json.dumps(card, indent=2, sort_keys=False)

def _versioning(self, force_rehash: bool = False):
def _versioning(self, force_rehash: bool = False,
change_description: Optional[str] = None):
# Check version info and do not allow without it
date_today = date.today().strftime("%d %B %Y")
if change_description is not None:
self.config.version.description += f"\n[{date_today}] {change_description}"
if self.config.version.description == 'No description':
logger.warning("Please consider populating the version information [description, performance, location, ontology] in cat.config.version")

Expand All @@ -222,14 +226,17 @@ def _versioning(self, force_rehash: bool = False):
if version.id is not None:
version.history.append(version['id'])
version.id = m
version.last_modified = date.today().strftime("%d %B %Y")
version.last_modified = date_today
version.cdb_info = self.cdb.make_stats()
version.meta_cats = [meta_cat.get_model_card(as_dict=True) for meta_cat in self._meta_cats]
version.medcat_version = __version__
logger.warning("Please consider updating [description, performance, location, ontology] in cat.config.version")

def create_model_pack(self, save_dir_path: str, model_pack_name: str = DEFAULT_MODEL_PACK_NAME, force_rehash: bool = False,
cdb_format: str = 'dill') -> str:
def create_model_pack(self, save_dir_path: str,
model_pack_name: str = DEFAULT_MODEL_PACK_NAME,
force_rehash: bool = False,
change_description: Optional[str] = None,
cdb_format: str = 'dill') -> str:
"""Will crete a .zip file containing all the models in the current running instance
of MedCAT. This is not the most efficient way, for sure, but good enough for now.

Expand All @@ -240,6 +247,8 @@ def create_model_pack(self, save_dir_path: str, model_pack_name: str = DEFAULT_M
The model pack name. Defaults to DEFAULT_MODEL_PACK_NAME.
force_rehash (bool):
Force recalculation of hash. Defaults to `False`.
change_description (Optional[str]):
The description of the change due to which a save is required. Defaults to None.
cdb_format (str):
The format of the saved CDB in the model pack.
The available formats are:
Expand All @@ -254,7 +263,7 @@ def create_model_pack(self, save_dir_path: str, model_pack_name: str = DEFAULT_M
# Spacy model always should be just the name, but during loading it can be reset to path
self.config.general.spacy_model = os.path.basename(self.config.general.spacy_model)
# Versioning
self._versioning(force_rehash)
self._versioning(force_rehash, change_description)
model_pack_name += "_{}".format(self.config.version.id)

logger.warning("This will save all models into a zip file, can take some time and require quite a bit of disk space.")
Expand Down