Merge pull request #220 from jrzaurin/save_opt

Option to save Optimizer in the `save` method
jrzaurin · Jul 23, 2024 · deb4f2e · deb4f2e
2 parents 8057360 + 9d73a88
commit deb4f2e
Show file tree

Hide file tree

Showing 12 changed files with 541 additions and 179 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -37,7 +37,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11"]
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.6.1
+1.6.2
diff --git a/pytorch_widedeep/self_supervised_training/_base_contrastive_denoising_trainer.py b/pytorch_widedeep/self_supervised_training/_base_contrastive_denoising_trainer.py
@@ -1,7 +1,9 @@
 import os
 import sys
+import json
 import warnings
 from abc import ABC, abstractmethod
+from pathlib import Path
 
 import numpy as np
 import torch
@@ -31,6 +33,11 @@
 from pytorch_widedeep.preprocessing.tab_preprocessor import TabPreprocessor
 
 
+# There is quite a lot of code repetition between the
+# BaseContrastiveDenoisingTrainer and the BaseEncoderDecoderTrainer. Given
+# how differently they are instantiated I am happy to tolerate this
+# repetition. However, if the code base grows, it might be worth refactoring
+# this code
 class BaseContrastiveDenoisingTrainer(ABC):
     def __init__(
         self,
@@ -96,45 +103,82 @@ def pretrain(
     ):
         raise NotImplementedError("Trainer.pretrain method not implemented")
 
-    @abstractmethod
     def save(
         self,
         path: str,
         save_state_dict: bool,
+        save_optimizer: bool,
         model_filename: str,
     ):
-        raise NotImplementedError("Trainer.save method not implemented")
+        r"""Saves the model, training and evaluation history (if any) to disk
 
-    def _set_loss_fn(self, **kwargs):
-        if self.loss_type in ["contrastive", "both"]:
-            temperature = kwargs.get("temperature", 0.1)
-            reduction = kwargs.get("reduction", "mean")
-            self.contrastive_loss = InfoNCELoss(temperature, reduction)
+        Parameters
+        ----------
+        path: str
+            path to the directory where the model and the feature importance
+            attribute will be saved.
+        save_state_dict: bool, default = False
+            Boolean indicating whether to save directly the model or the
+            model's state dictionary
+        save_optimizer: bool, default = False
+            Boolean indicating whether to save the optimizer or not
+        model_filename: str, Optional, default = "ed_model.pt"
+            filename where the model weights will be store
+        """
 
-        if self.loss_type in ["denoising", "both"]:
-            lambda_cat = kwargs.get("lambda_cat", 1.0)
-            lambda_cont = kwargs.get("lambda_cont", 1.0)
-            reduction = kwargs.get("reduction", "mean")
-            self.denoising_loss = DenoisingLoss(lambda_cat, lambda_cont, reduction)
+        self._save_history(path)
 
-    def _compute_loss(
-        self,
-        g_projs: Optional[Tuple[Tensor, Tensor]],
-        x_cat_and_cat_: Optional[Tuple[Tensor, Tensor]],
-        x_cont_and_cont_: Optional[Tuple[Tensor, Tensor]],
-    ) -> Tensor:
-        contrastive_loss = (
-            self.contrastive_loss(g_projs)
-            if self.loss_type in ["contrastive", "both"]
-            else torch.tensor(0.0)
+        self._save_model_and_optimizer(
+            path, save_state_dict, save_optimizer, model_filename
         )
-        denoising_loss = (
-            self.denoising_loss(x_cat_and_cat_, x_cont_and_cont_)
-            if self.loss_type in ["denoising", "both"]
-            else torch.tensor(0.0)
+
+    def _save_history(self, path: str):
+        # 'history' here refers to both, the training/evaluation history and
+        #  the lr history
+        save_dir = Path(path)
+        history_dir = save_dir / "history"
+        history_dir.mkdir(exist_ok=True, parents=True)
+
+        # the trainer is run with the History Callback by default
+        with open(history_dir / "train_eval_history.json", "w") as teh:
+            json.dump(self.history, teh)  # type: ignore[attr-defined]
+
+        has_lr_history = any(
+            [clbk.__class__.__name__ == "LRHistory" for clbk in self.callbacks]
         )
+        if self.lr_scheduler is not None and has_lr_history:
+            with open(history_dir / "lr_history.json", "w") as lrh:
+                json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]
 
-        return contrastive_loss + denoising_loss
+    def _save_model_and_optimizer(
+        self,
+        path: str,
+        save_state_dict: bool,
+        save_optimizer: bool,
+        model_filename: str,
+    ):
+
+        model_path = Path(path) / model_filename
+        if save_state_dict and save_optimizer:
+            torch.save(
+                {
+                    "model_state_dict": self.cd_model.state_dict(),
+                    "optimizer_state_dict": self.optimizer.state_dict(),
+                },
+                model_path,
+            )
+        elif save_state_dict and not save_optimizer:
+            torch.save(self.cd_model.state_dict(), model_path)
+        elif not save_state_dict and save_optimizer:
+            torch.save(
+                {
+                    "model": self.cd_model,
+                    "optimizer": self.optimizer,  # this can be a MultipleOptimizer
+                },
+                model_path,
+            )
+        else:
+            torch.save(self.cd_model, model_path)
 
     def _set_reduce_on_plateau_criterion(
         self, lr_scheduler, reducelronplateau_criterion
@@ -233,6 +277,37 @@ def _set_device_and_num_workers(**kwargs):
         num_workers = kwargs.get("num_workers", default_num_workers)
         return device, num_workers
 
+    def _set_loss_fn(self, **kwargs):
+        if self.loss_type in ["contrastive", "both"]:
+            temperature = kwargs.get("temperature", 0.1)
+            reduction = kwargs.get("reduction", "mean")
+            self.contrastive_loss = InfoNCELoss(temperature, reduction)
+
+        if self.loss_type in ["denoising", "both"]:
+            lambda_cat = kwargs.get("lambda_cat", 1.0)
+            lambda_cont = kwargs.get("lambda_cont", 1.0)
+            reduction = kwargs.get("reduction", "mean")
+            self.denoising_loss = DenoisingLoss(lambda_cat, lambda_cont, reduction)
+
+    def _compute_loss(
+        self,
+        g_projs: Optional[Tuple[Tensor, Tensor]],
+        x_cat_and_cat_: Optional[Tuple[Tensor, Tensor]],
+        x_cont_and_cont_: Optional[Tuple[Tensor, Tensor]],
+    ) -> Tensor:
+        contrastive_loss = (
+            self.contrastive_loss(g_projs)
+            if self.loss_type in ["contrastive", "both"]
+            else torch.tensor(0.0)
+        )
+        denoising_loss = (
+            self.denoising_loss(x_cat_and_cat_, x_cont_and_cont_)
+            if self.loss_type in ["denoising", "both"]
+            else torch.tensor(0.0)
+        )
+
+        return contrastive_loss + denoising_loss
+
     @staticmethod
     def _check_model_is_supported(model: ModelWithAttention):
         if model.__class__.__name__ == "TabPerceiver":

diff --git a/pytorch_widedeep/self_supervised_training/_base_encoder_decoder_trainer.py b/pytorch_widedeep/self_supervised_training/_base_encoder_decoder_trainer.py
@@ -1,7 +1,9 @@
 import os
 import sys
+import json
 import warnings
 from abc import ABC, abstractmethod
+from pathlib import Path
 
 import numpy as np
 import torch
@@ -66,22 +68,90 @@ def __init__(
     def pretrain(
         self,
         X_tab: np.ndarray,
-        X_val: Optional[np.ndarray],
+        X_tab_val: Optional[np.ndarray],
         val_split: Optional[float],
         validation_freq: int,
         n_epochs: int,
         batch_size: int,
     ):
         raise NotImplementedError("Trainer.pretrain method not implemented")
 
-    @abstractmethod
     def save(
         self,
         path: str,
         save_state_dict: bool,
+        save_optimizer: bool,
         model_filename: str,
     ):
-        raise NotImplementedError("Trainer.save method not implemented")
+        r"""Saves the model, training and evaluation history (if any) to disk
+
+        Parameters
+        ----------
+        path: str
+            path to the directory where the model and the feature importance
+            attribute will be saved.
+        save_state_dict: bool, default = False
+            Boolean indicating whether to save directly the model or the
+            model's state dictionary
+        save_optimizer: bool, default = False
+            Boolean indicating whether to save the optimizer or not
+        model_filename: str, Optional, default = "ed_model.pt"
+            filename where the model weights will be store
+        """
+
+        self._save_history(path)
+
+        self._save_model_and_optimizer(
+            path, save_state_dict, save_optimizer, model_filename
+        )
+
+    def _save_history(self, path: str):
+        # 'history' here refers to both, the training/evaluation history and
+        #  the lr history
+        save_dir = Path(path)
+        history_dir = save_dir / "history"
+        history_dir.mkdir(exist_ok=True, parents=True)
+
+        # the trainer is run with the History Callback by default
+        with open(history_dir / "train_eval_history.json", "w") as teh:
+            json.dump(self.history, teh)  # type: ignore[attr-defined]
+
+        has_lr_history = any(
+            [clbk.__class__.__name__ == "LRHistory" for clbk in self.callbacks]
+        )
+        if self.lr_scheduler is not None and has_lr_history:
+            with open(history_dir / "lr_history.json", "w") as lrh:
+                json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]
+
+    def _save_model_and_optimizer(
+        self,
+        path: str,
+        save_state_dict: bool,
+        save_optimizer: bool,
+        model_filename: str,
+    ):
+
+        model_path = Path(path) / model_filename
+        if save_state_dict and save_optimizer:
+            torch.save(
+                {
+                    "model_state_dict": self.ed_model.state_dict(),
+                    "optimizer_state_dict": self.optimizer.state_dict(),
+                },
+                model_path,
+            )
+        elif save_state_dict and not save_optimizer:
+            torch.save(self.ed_model.state_dict(), model_path)
+        elif not save_state_dict and save_optimizer:
+            torch.save(
+                {
+                    "model": self.ed_model,
+                    "optimizer": self.optimizer,  # this can be a MultipleOptimizer
+                },
+                model_path,
+            )
+        else:
+            torch.save(self.ed_model, model_path)
 
     def _set_reduce_on_plateau_criterion(
         self, lr_scheduler, reducelronplateau_criterion

diff --git a/pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py b/pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
@@ -1,6 +1,3 @@
-import json
-from pathlib import Path
-
 import numpy as np
 import torch
 from tqdm import trange
@@ -259,46 +256,6 @@ def fit(
             X_tab, X_tab_val, val_split, validation_freq, n_epochs, batch_size
         )
 
-    def save(
-        self,
-        path: str,
-        save_state_dict: bool = False,
-        model_filename: str = "cd_model.pt",
-    ):
-        r"""Saves the model, training and evaluation history (if any) to disk
-
-        Parameters
-        ----------
-        path: str
-            path to the directory where the model and the feature importance
-            attribute will be saved.
-        save_state_dict: bool, default = False
-            Boolean indicating whether to save directly the model or the
-            model's state dictionary
-        model_filename: str, Optional, default = "cd_model.pt"
-            filename where the model weights will be store
-        """
-        save_dir = Path(path)
-        history_dir = save_dir / "history"
-        history_dir.mkdir(exist_ok=True, parents=True)
-
-        # the trainer is run with the History Callback by default
-        with open(history_dir / "train_eval_history.json", "w") as teh:
-            json.dump(self.history, teh)  # type: ignore[attr-defined]
-
-        has_lr_history = any(
-            [clbk.__class__.__name__ == "LRHistory" for clbk in self.callbacks]
-        )
-        if self.lr_scheduler is not None and has_lr_history:
-            with open(history_dir / "lr_history.json", "w") as lrh:
-                json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]
-
-        model_path = save_dir / model_filename
-        if save_state_dict:
-            torch.save(self.cd_model.state_dict(), model_path)
-        else:
-            torch.save(self.cd_model, model_path)
-
     def _train_step(self, X_tab: Tensor, batch_idx: int) -> float:
         X = X_tab.to(self.device)
 
@@ -337,7 +294,7 @@ def _train_eval_split(
             train_set = TensorDataset(torch.from_numpy(X))
             eval_set = TensorDataset(torch.from_numpy(X_tab_val))
         elif val_split is not None:
-            X_tr, X_tab_val = train_test_split(
+            X_tr, X_tab_val = train_test_split(  # type: ignore
                 X, test_size=val_split, random_state=self.seed
             )
             train_set = TensorDataset(torch.from_numpy(X_tr))