set weights_only parameter of torch.load to False

- #48
ChEB-AI · Oct 5, 2024 · 7fc96a9 · 7fc96a9
1 parent e17a9c0
commit 7fc96a9
Show file tree

Hide file tree

Showing 16 changed files with 68 additions and 29 deletions.
diff --git a/chebai/models/electra.py b/chebai/models/electra.py
@@ -256,7 +256,9 @@ def __init__(
         # Load pretrained checkpoint if provided
         if pretrained_checkpoint:
             with open(pretrained_checkpoint, "rb") as fin:
-                model_dict = torch.load(fin, map_location=self.device)
+                model_dict = torch.load(
+                    fin, map_location=self.device, weights_only=False
+                )
                 if load_prefix:
                     state_dict = filter_dict(model_dict["state_dict"], load_prefix)
                 else:
@@ -414,7 +416,9 @@ def __init__(self, cone_dimensions=20, **kwargs):
         model_prefix = kwargs.get("load_prefix", None)
         if pretrained_checkpoint:
             with open(pretrained_checkpoint, "rb") as fin:
-                model_dict = torch.load(fin, map_location=self.device)
+                model_dict = torch.load(
+                    fin, map_location=self.device, weights_only=False
+                )
                 if model_prefix:
                     state_dict = {
                         str(k)[len(model_prefix) :]: v

diff --git a/chebai/preprocessing/datasets/base.py b/chebai/preprocessing/datasets/base.py
@@ -200,7 +200,9 @@ def load_processed_data(
                     filename = self.processed_file_names_dict[kind]
             except NotImplementedError:
                 filename = f"{kind}.pt"
-        return torch.load(os.path.join(self.processed_dir, filename))
+        return torch.load(
+            os.path.join(self.processed_dir, filename), weights_only=False
+        )
 
     def dataloader(self, kind: str, **kwargs) -> DataLoader:
         """
@@ -519,7 +521,7 @@ def dataloader(self, kind: str, **kwargs) -> DataLoader:
             DataLoader: DataLoader object for the specified subset.
         """
         subdatasets = [
-            torch.load(os.path.join(s.processed_dir, f"{kind}.pt"))
+            torch.load(os.path.join(s.processed_dir, f"{kind}.pt"), weights_only=False)
             for s in self.subsets
         ]
         dataset = [
@@ -1022,7 +1024,9 @@ def _retrieve_splits_from_csv(self) -> None:
         splits_df = pd.read_csv(self.splits_file_path)
 
         filename = self.processed_file_names_dict["data"]
-        data = torch.load(os.path.join(self.processed_dir, filename))
+        data = torch.load(
+            os.path.join(self.processed_dir, filename), weights_only=False
+        )
         df_data = pd.DataFrame(data)
 
         train_ids = splits_df[splits_df["split"] == "train"]["id"]
@@ -1081,7 +1085,9 @@ def load_processed_data(
 
         # If filename is provided
         try:
-            return torch.load(os.path.join(self.processed_dir, filename))
+            return torch.load(
+                os.path.join(self.processed_dir, filename), weights_only=False
+            )
         except FileNotFoundError:
             raise FileNotFoundError(f"File {filename} doesn't exist")
 

diff --git a/chebai/preprocessing/datasets/chebi.py b/chebai/preprocessing/datasets/chebi.py
@@ -407,7 +407,9 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
         """
         try:
             filename = self.processed_file_names_dict["data"]
-            data_chebi_version = torch.load(os.path.join(self.processed_dir, filename))
+            data_chebi_version = torch.load(
+                os.path.join(self.processed_dir, filename), weights_only=False
+            )
         except FileNotFoundError:
             raise FileNotFoundError(
                 f"File data.pt doesn't exists. "
@@ -428,7 +430,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
                 data_chebi_train_version = torch.load(
                     os.path.join(
                         self._chebi_version_train_obj.processed_dir, filename_train
-                    )
+                    ),
+                    weights_only=False,
                 )
             except FileNotFoundError:
                 raise FileNotFoundError(

diff --git a/chebai/preprocessing/datasets/go_uniprot.py b/chebai/preprocessing/datasets/go_uniprot.py
@@ -508,7 +508,9 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
         """
         try:
             filename = self.processed_file_names_dict["data"]
-            data_go = torch.load(os.path.join(self.processed_dir, filename))
+            data_go = torch.load(
+                os.path.join(self.processed_dir, filename), weights_only=False
+            )
         except FileNotFoundError:
             raise FileNotFoundError(
                 f"File data.pt doesn't exists. "

diff --git a/chebai/preprocessing/datasets/pubchem.py b/chebai/preprocessing/datasets/pubchem.py
@@ -891,10 +891,10 @@ def dataloader(self, kind: str, **kwargs) -> DataLoader:
             DataLoader: DataLoader instance.
         """
         labeled_data = torch.load(
-            os.path.join(self.labeled.processed_dir, f"{kind}.pt")
+            os.path.join(self.labeled.processed_dir, f"{kind}.pt"), weights_only=False
         )
         unlabeled_data = torch.load(
-            os.path.join(self.unlabeled.processed_dir, f"{kind}.pt")
+            os.path.join(self.unlabeled.processed_dir, f"{kind}.pt"), weights_only=False
         )
         if self.data_limit is not None:
             labeled_data = labeled_data[: self.data_limit]

diff --git a/chebai/preprocessing/migration/chebi_data_migration.py b/chebai/preprocessing/migration/chebi_data_migration.py
@@ -168,7 +168,7 @@ def _combine_pt_splits(
         df_list: List[pd.DataFrame] = []
         for split, file_name in old_splits_file_names.items():
             file_path = os.path.join(old_dir, file_name)
-            file_df = pd.DataFrame(torch.load(file_path))
+            file_df = pd.DataFrame(torch.load(file_path, weights_only=False))
             df_list.append(file_df)
 
         return pd.concat(df_list, ignore_index=True)

diff --git a/chebai/result/analyse_sem.py b/chebai/result/analyse_sem.py
@@ -427,7 +427,9 @@ def run_all(
                     os.path.join(buffer_dir_smoothed, "preds000.pt")
                 ):
                     preds = torch.load(
-                        os.path.join(buffer_dir_smoothed, "preds000.pt"), DEVICE
+                        os.path.join(buffer_dir_smoothed, "preds000.pt"),
+                        DEVICE,
+                        weights_only=False,
                     )
                     labels = None
                 else:

diff --git a/chebai/result/base.py b/chebai/result/base.py
@@ -54,7 +54,7 @@ def _generate_predictions(self, data_path, raw=False, **kwargs):
         else:
             data_tuples = [
                 (x.get("raw_features", x["ident"]), x["ident"], x)
-                for x in torch.load(data_path)
+                for x in torch.load(data_path, weights_only=False)
             ]
 
         for raw_features, ident, row in tqdm.tqdm(data_tuples):

diff --git a/chebai/result/pretraining.py b/chebai/result/pretraining.py
@@ -34,7 +34,7 @@ def evaluate_model(logs_base_path, model_filename, data_module):
     collate = data_module.reader.COLLATOR()
     test_file = "test.pt"
     data_path = os.path.join(data_module.processed_dir, test_file)
-    data_list = torch.load(data_path)
+    data_list = torch.load(data_path, weights_only=False)
     preds_list = []
     labels_list = []
 

diff --git a/chebai/result/utils.py b/chebai/result/utils.py
@@ -182,6 +182,7 @@ def load_results_from_buffer(
             torch.load(
                 os.path.join(buffer_dir, filename),
                 map_location=torch.device(device),
+                weights_only=False,
             )
         )
         i += 1
@@ -194,6 +195,7 @@ def load_results_from_buffer(
             torch.load(
                 os.path.join(buffer_dir, filename),
                 map_location=torch.device(device),
+                weights_only=False,
             )
         )
         i += 1

diff --git a/tests/testCustomBalancedAccuracyMetric.py b/tests/testCustomBalancedAccuracyMetric.py
@@ -49,7 +49,9 @@ def test_metric_against_realistic_data(self) -> None:
 
         # load single file to get the num of labels for metric class instantiation
         labels = torch.load(
-            f"{directory_path}/labels{0:03d}.pt", map_location=torch.device(self.device)
+            f"{directory_path}/labels{0:03d}.pt",
+            map_location=torch.device(self.device),
+            weights_only=False,
         )
         num_labels = labels.shape[1]
         balanced_acc_custom = BalancedAccuracy(num_labels=num_labels)
@@ -58,10 +60,12 @@ def test_metric_against_realistic_data(self) -> None:
             labels = torch.load(
                 f"{directory_path}/labels{i:03d}.pt",
                 map_location=torch.device(self.device),
+                weights_only=False,
             )
             preds = torch.load(
                 f"{directory_path}/preds{i:03d}.pt",
                 map_location=torch.device(self.device),
+                weights_only=False,
             )
             balanced_acc_custom.update(preds, labels)
 

diff --git a/tests/testCustomMacroF1Metric.py b/tests/testCustomMacroF1Metric.py
@@ -119,7 +119,9 @@ def test_metric_against_realistic_data(self) -> None:
 
         # Load single file to get the number of labels for metric class instantiation
         labels = torch.load(
-            f"{directory_path}/labels{0:03d}.pt", map_location=torch.device(self.device)
+            f"{directory_path}/labels{0:03d}.pt",
+            map_location=torch.device(self.device),
+            weights_only=False,
         )
         num_labels = labels.shape[1]
         macro_f1_custom = MacroF1(num_labels=num_labels)
@@ -130,10 +132,12 @@ def test_metric_against_realistic_data(self) -> None:
             labels = torch.load(
                 f"{directory_path}/labels{i:03d}.pt",
                 map_location=torch.device(self.device),
+                weights_only=False,
             )
             preds = torch.load(
                 f"{directory_path}/preds{i:03d}.pt",
                 map_location=torch.device(self.device),
+                weights_only=False,
             )
             macro_f1_standard.update(preds, labels)
             macro_f1_custom.update(preds, labels)

diff --git a/tests/testPubChemData.py b/tests/testPubChemData.py
@@ -37,9 +37,15 @@ def getDataSplitsOverlaps(cls) -> None:
         processed_path = os.path.join(os.getcwd(), cls.pubChem.processed_dir)
         print(f"Checking Data from - {processed_path}")
 
-        train_set = torch.load(os.path.join(processed_path, "train.pt"))
-        val_set = torch.load(os.path.join(processed_path, "validation.pt"))
-        test_set = torch.load(os.path.join(processed_path, "test.pt"))
+        train_set = torch.load(
+            os.path.join(processed_path, "train.pt"), weights_only=False
+        )
+        val_set = torch.load(
+            os.path.join(processed_path, "validation.pt"), weights_only=False
+        )
+        test_set = torch.load(
+            os.path.join(processed_path, "test.pt"), weights_only=False
+        )
 
         train_smiles, train_smiles_ids = cls.get_features_ids(train_set)
         val_smiles, val_smiles_ids = cls.get_features_ids(val_set)

diff --git a/tests/testTox21MolNetData.py b/tests/testTox21MolNetData.py
@@ -37,9 +37,15 @@ def getDataSplitsOverlaps(cls) -> None:
         processed_path = os.path.join(os.getcwd(), cls.tox21.processed_dir)
         print(f"Checking Data from - {processed_path}")
 
-        train_set = torch.load(os.path.join(processed_path, "train.pt"))
-        val_set = torch.load(os.path.join(processed_path, "validation.pt"))
-        test_set = torch.load(os.path.join(processed_path, "test.pt"))
+        train_set = torch.load(
+            os.path.join(processed_path, "train.pt"), weights_only=False
+        )
+        val_set = torch.load(
+            os.path.join(processed_path, "validation.pt"), weights_only=False
+        )
+        test_set = torch.load(
+            os.path.join(processed_path, "test.pt"), weights_only=False
+        )
 
         train_smiles, train_smiles_ids = cls.get_features_ids(train_set)
         val_smiles, val_smiles_ids = cls.get_features_ids(val_set)

diff --git a/tutorials/demo_process_results.ipynb b/tutorials/demo_process_results.ipynb
@@ -248,9 +248,9 @@
     "# check if pretraining datasets overlap\n",
     "dm = PubChemDeepSMILES()\n",
     "processed_path = dm.processed_dir\n",
-    "test_set = torch.load(os.path.join(processed_path, \"test.pt\"))\n",
-    "val_set = torch.load(os.path.join(processed_path, \"validation.pt\"))\n",
-    "train_set = torch.load(os.path.join(processed_path, \"train.pt\"))\n",
+    "test_set = torch.load(os.path.join(processed_path, \"test.pt\"), weights_only=False)\n",
+    "val_set = torch.load(os.path.join(processed_path, \"validation.pt\"), weights_only=False)\n",
+    "train_set = torch.load(os.path.join(processed_path, \"train.pt\"), weights_only=False)\n",
     "print(processed_path)\n",
     "test_smiles = [entry[\"features\"] for entry in test_set]\n",
     "val_smiles = [entry[\"features\"] for entry in val_set]\n",
@@ -320,7 +320,7 @@
     "data_module_v200 = ChEBIOver100()\n",
     "data_module_v148 = ChEBIOver100(chebi_version_train=148)\n",
     "data_module_v227 = ChEBIOver100(chebi_version_train=227)\n",
-    "# dataset = torch.load(data_path)\n",
+    "# dataset = torch.load(data_path, weights_only=False)\n",
     "# processors = [CustomResultsProcessor()]\n",
     "# factory = ResultFactory(model, data_module, processors)\n",
     "# factory.execute(data_path)"
@@ -653,7 +653,7 @@
     "    if test_file is None:\n",
     "        test_file = data_module.processed_file_names_dict[\"test\"]\n",
     "    data_path = os.path.join(data_module.processed_dir, test_file)\n",
-    "    data_list = torch.load(data_path)\n",
+    "    data_list = torch.load(data_path, weights_only=False)\n",
     "    preds_list = []\n",
     "    labels_list = []\n",
     "    # if common_classes_mask is not N\n",

diff --git a/tutorials/process_results_old_chebi.ipynb b/tutorials/process_results_old_chebi.ipynb
@@ -167,7 +167,7 @@
     "    if test_file is None:\n",
     "        test_file = data_module.processed_file_names_dict[\"test\"]\n",
     "    data_path = os.path.join(data_module.processed_dir, test_file)\n",
-    "    data_list = torch.load(data_path)\n",
+    "    data_list = torch.load(data_path, weights_only=False)\n",
     "    preds_list = []\n",
     "    labels_list = []\n",
     "\n",