CogStack · shubham-s-agarwal · Feb 17, 2025 · Jan 24, 2025 · Feb 17, 2025
diff --git a/medcat/2_train_model/2_supervised_training/meta_annotation_training.ipynb b/medcat/2_train_model/2_supervised_training/meta_annotation_training.ipynb
@@ -114,7 +114,7 @@
    "id": "83701c19",
    "metadata": {},
    "source": [
-    "# For LSTM model"
+    "# For LSTM and BERT model"
    ]
   },
   {
@@ -125,68 +125,23 @@
    "outputs": [],
    "source": [
     "for meta_model in meta_model_names:\n",
-    "    vocab_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,'bbpe-vocab.json')\n",
-    "    merges_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,'bbpe-merges.txt')\n",
-    "    tokenizer = TokenizerWrapperBPE(ByteLevelBPETokenizer(vocab=vocab_file,\n",
-    "                                    merges=merges_file,\n",
-    "                                    lowercase=True))\n",
-    "    # load and sort out the config\n",
-    "    config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n",
-    "    with open(config_file, 'r') as jfile:\n",
-    "        config_dict = json.load(jfile)\n",
-    "    config = ConfigMetaCAT()\n",
-    "    for key, value in config_dict.items():\n",
-    "        setattr(config, key, value['py/state']['__dict__'])\n",
-    "        \n",
+    "    \n",
+    "    # load the meta_model\n",
+    "    mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,\"meta_\"+meta_model))\n",
+    "\n",
+    "    # changing parameters\n",
+    "    mc.config.train['nepochs'] = 15\n",
+    "\n",
     "    save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
     "    #Ideally this should replace the meta_models inside the modelpack\n",
     "\n",
-    "    # Initialise and train meta_model\n",
-    "    mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n",
+    "    # train the meta_model\n",
     "    results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n",
     "    \n",
     "    # Save results\n",
     "    json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results.json'), 'w'))"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "91ff4e28",
-   "metadata": {},
-   "source": [
-    "# For BERT model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e255dda2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for meta_model in meta_model_names:\n",
-    "    # load and sort out the config\n",
-    "    config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n",
-    "    with open(config_file, 'r') as jfile:\n",
-    "        config_dict = json.load(jfile)\n",
-    "    config = ConfigMetaCAT()\n",
-    "    for key, value in config_dict.items():\n",
-    "        setattr(config, key, value['py/state']['__dict__'])\n",
-    "\n",
-    "    tokenizer = TokenizerWrapperBERT.load(os.path.join(base_dir_meta_models,\"meta_\"+meta_model), \n",
-    "                                          config.model['model_variant'])\n",
-    "    \n",
-    "    # change model name if training BERT for the first time\n",
-    "    config.model['model_name'] = 'bert'\n",
-    "    \n",
-    "    save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
-    "    #Ideally this should replace the meta_models inside the modelpack\n",
-    "\n",
-    "    # Initialise and train meta_model\n",
-    "    mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n",
-    "    results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "ab23e424",