From 76aed0283e6dd5c07b35d099de76783458d9ea44 Mon Sep 17 00:00:00 2001 From: Xi Bai Date: Mon, 5 Aug 2024 17:00:22 +0100 Subject: [PATCH] make already trained models forward compatible --- app/model_services/medcat_model_deid.py | 1 + app/requirements.txt | 1 + app/trainers/medcat_deid_trainer.py | 2 ++ docker/medcat-deid/requirements.txt | 1 + docker/medcat-icd10/requirements.txt | 1 + docker/medcat-snomed/requirements.txt | 1 + docker/medcat-umls/requirements.txt | 1 + docker/trf-deid/requirements.txt | 1 + 8 files changed, 9 insertions(+) diff --git a/app/model_services/medcat_model_deid.py b/app/model_services/medcat_model_deid.py index 6c1aca6..9c80eb3 100644 --- a/app/model_services/medcat_model_deid.py +++ b/app/model_services/medcat_model_deid.py @@ -110,6 +110,7 @@ def init_model(self) -> None: self._model = self.load_model(self._model_pack_path) self._model._addl_ner[0].tokenizer.hf_tokenizer._in_target_context_manager = getattr(self._model._addl_ner[0].tokenizer.hf_tokenizer, "_in_target_context_manager", False) self._model._addl_ner[0].tokenizer.hf_tokenizer.clean_up_tokenization_spaces = getattr(self._model._addl_ner[0].tokenizer.hf_tokenizer, "clean_up_tokenization_spaces", None) + self._model._addl_ner[0].tokenizer.hf_tokenizer.split_special_tokens = getattr(self._model._addl_ner[0].tokenizer.hf_tokenizer, "split_special_tokens", False) if (self._config.DEVICE.startswith("cuda") and torch.cuda.is_available()) or \ (self._config.DEVICE.startswith("mps") and torch.backends.mps.is_available()) or \ (self._config.DEVICE.startswith("cpu")): diff --git a/app/requirements.txt b/app/requirements.txt index ee71355..70973f8 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,4 +1,5 @@ medcat~=1.9.0 +blis<1.0.0 fastapi~=0.110.3 uvicorn~=0.29.0 python-multipart~=0.0.5 diff --git a/app/trainers/medcat_deid_trainer.py b/app/trainers/medcat_deid_trainer.py index 39379c1..c60750a 100644 --- a/app/trainers/medcat_deid_trainer.py +++ b/app/trainers/medcat_deid_trainer.py @@ -95,6 +95,7 @@ def run(trainer: "MedcatDeIdentificationSupervisedTrainer", ner = model._addl_ner[0] ner.tokenizer.hf_tokenizer._in_target_context_manager = getattr(ner.tokenizer.hf_tokenizer, "_in_target_context_manager", False) ner.tokenizer.hf_tokenizer.clean_up_tokenization_spaces = getattr(ner.tokenizer.hf_tokenizer, "clean_up_tokenization_spaces", None) + ner.tokenizer.hf_tokenizer.split_special_tokens = getattr(ner.tokenizer.hf_tokenizer, "split_special_tokens", False) _save_pretrained = ner.model.save_pretrained if ("safe_serialization" in inspect.signature(_save_pretrained).parameters): ner.model.save_pretrained = partial(_save_pretrained, safe_serialization=(trainer._config.TRAINING_SAFE_MODEL_SERIALISATION == "true")) @@ -223,6 +224,7 @@ def run(trainer: "MedcatDeIdentificationSupervisedTrainer", ner = trainer._model_service._model._addl_ner[0] ner.tokenizer.hf_tokenizer._in_target_context_manager = getattr(ner.tokenizer.hf_tokenizer, "_in_target_context_manager", False) ner.tokenizer.hf_tokenizer.clean_up_tokenization_spaces = getattr(ner.tokenizer.hf_tokenizer, "clean_up_tokenization_spaces", None) + ner.tokenizer.hf_tokenizer.split_special_tokens = getattr(ner.tokenizer.hf_tokenizer, "split_special_tokens", False) eval_results, examples = ner.eval(data_file.name) cui2names = {} eval_results.sort_values(by=["cui"]) diff --git a/docker/medcat-deid/requirements.txt b/docker/medcat-deid/requirements.txt index 7959c04..84aeb53 100644 --- a/docker/medcat-deid/requirements.txt +++ b/docker/medcat-deid/requirements.txt @@ -1,4 +1,5 @@ medcat~=1.9.0 +blis<1.0.0 fastapi~=0.102.0 uvicorn~=0.29.0 python-multipart~=0.0.5 diff --git a/docker/medcat-icd10/requirements.txt b/docker/medcat-icd10/requirements.txt index 7959c04..84aeb53 100644 --- a/docker/medcat-icd10/requirements.txt +++ b/docker/medcat-icd10/requirements.txt @@ -1,4 +1,5 @@ medcat~=1.9.0 +blis<1.0.0 fastapi~=0.102.0 uvicorn~=0.29.0 python-multipart~=0.0.5 diff --git a/docker/medcat-snomed/requirements.txt b/docker/medcat-snomed/requirements.txt index 7959c04..84aeb53 100644 --- a/docker/medcat-snomed/requirements.txt +++ b/docker/medcat-snomed/requirements.txt @@ -1,4 +1,5 @@ medcat~=1.9.0 +blis<1.0.0 fastapi~=0.102.0 uvicorn~=0.29.0 python-multipart~=0.0.5 diff --git a/docker/medcat-umls/requirements.txt b/docker/medcat-umls/requirements.txt index 7959c04..84aeb53 100644 --- a/docker/medcat-umls/requirements.txt +++ b/docker/medcat-umls/requirements.txt @@ -1,4 +1,5 @@ medcat~=1.9.0 +blis<1.0.0 fastapi~=0.102.0 uvicorn~=0.29.0 python-multipart~=0.0.5 diff --git a/docker/trf-deid/requirements.txt b/docker/trf-deid/requirements.txt index 51a16de..aa354de 100644 --- a/docker/trf-deid/requirements.txt +++ b/docker/trf-deid/requirements.txt @@ -1,4 +1,5 @@ medcat~=1.6.0 +blis<1.0.0 fastapi~=0.102.0 uvicorn~=0.29.0 python-multipart~=0.0.5