Lightning-AI · williamFalcon · May 24, 2020 · May 19, 2020 · May 19, 2020 · May 19, 2020
@@ -133,4 +133,5 @@ mnist/
 # pl tests
 ml-runs/
 *.zip
-pytorch\ lightning
+pytorch\ lightning
+test-reports/
@@ -14,3 +14,6 @@ rm -rf ./tests/tests/*
 rm -rf ./lightning_logs
 python -m coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules --flake8
 python -m coverage report -m
+
+# specific file
+# python -m coverage run --source pytorch_lightning -m py.test -k test_trainer.py --flake8
@@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Removed non-finite values from loss in `LRFinder` ([#1862](https://github.com/PyTorchLightning/pytorch-lightning/pull/1862))
 
+- Allow passing model hyperparameters as complete kwarg list ([#1896](https://github.com/PyTorchLightning/pytorch-lightning/pull/1896)) 
+
 ### Deprecated
 
 - Dropped official support/testing for older PyTorch versions <1.3 ([#1917](https://github.com/PyTorchLightning/pytorch-lightning/pull/1917))

@@ -105,21 +105,22 @@ modify the network and read those values in the LightningModule
 
     class LitMNIST(LightningModule):
 
-        def __init__(self, hparams):
+        def __init__(self, layer_1_dim, layer_2_dim, learning_rate, batch_size):
             super().__init__()
+            self.layer_1_dim = layer_1_dim
+            self.layer_2_dim = layer_2_dim
+            self.learning_rate = learning_rate
+            self.batch_size = batch_size
 
-            # do this to save all arguments in any logger (tensorboard)
-            self.hparams = hparams
-
-            self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim)
-            self.layer_2 = torch.nn.Linear(hparams.layer_1_dim, hparams.layer_2_dim)
-            self.layer_3 = torch.nn.Linear(hparams.layer_2_dim, 10)
+            self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim)
+            self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.layer_2_dim)
+            self.layer_3 = torch.nn.Linear(self.layer_2_dim, 10)
 
         def train_dataloader(self):
-            return DataLoader(mnist_train, batch_size=self.hparams.batch_size)
+            return DataLoader(mnist_train, batch_size=self.batch_size)
 
         def configure_optimizers(self):
-            return Adam(self.parameters(), lr=self.hparams.learning_rate)
+            return Adam(self.parameters(), lr=self.learning_rate)
 
         @staticmethod
         def add_model_specific_args(parent_parser):
@@ -136,14 +137,35 @@ Now pass in the params when you init your model
 
     parser = ArgumentParser()
     parser = LitMNIST.add_model_specific_args(parser)
-    hparams = parser.parse_args()
-    model = LitMNIST(hparams)
+    args = parser.parse_args()
+    model = LitMNIST(args)
+
+Within any LightningModule all the arguments you pass into your `__init__` will be available
+simply with `self._module_arguments`. However, we won't overwrite any other arguments you have already defined.
+We will also add all of those values to the TensorBoard hparams tab (unless it's an object which
+we won't). We also will store those values into checkpoints for you which you can use to init your
+models.
+
+.. code-block:: python
+
+    class LitMNIST(LightningModule):
+
+        def __init__(self, layer_1_dim, some_other_param):
+            super().__init__()
+            self.layer_1_dim = layer_1_dim
+            self.some_other_param = some_other_param
+
+            self.layer_1 = torch.nn.Linear(28 * 28, self.layer_1_dim)
+
+            # self.some_other_param is automatically available
+            self.layer_2 = torch.nn.Linear(self.layer_1_dim, self.some_other_param)
+            self.layer_3 = torch.nn.Linear(self.some_other_param, 10)
+
+            self.some_other_param = 12
+            # but you can override it as normal
 
-The line `self.hparams = hparams` is very special. This line assigns your hparams to the LightningModule.
-This does two things:
+    model = LitMNIST(10, 20)
 
-1.  It adds them automatically to TensorBoard logs under the hparams tab.
-2.  Lightning will save those hparams to the checkpoint and use them to restore the module correctly.
 
 Trainer args
 ^^^^^^^^^^^^

@@ -36,18 +36,17 @@ hyperparameters of the model.
     # default: no automatic learning rate finder
     trainer = Trainer(auto_lr_find=False)
 
-When the ``lr`` or ``learning_rate`` key in hparams exists, this flag sets your learning_rate.
-In both cases, if the respective fields are not found, an error will be thrown.
-
+This flag sets your learning rate which can be accessed via ``self.lr`` or ``self.learning_rate``.
+
 .. testcode::
 
     class LitModel(LightningModule):
 
-        def __init__(self, hparams):
-            self.hparams = hparams
+        def __init__(self, learning_rate):
+            self.learning_rate = learning_rate
 
         def configure_optimizers(self):
-            return Adam(self.parameters(), lr=self.hparams.lr|self.hparams.learning_rate)
+            return Adam(self.parameters(), lr=(self.hparams.lr or self.hparams.learning_rate))
 
     # finds learning rate automatically
     # sets hparams.lr or hparams.learning_rate to that learning rate
@@ -97,7 +96,7 @@ of this would look like
 
     # update hparams of the model
     model.hparams.lr = new_lr
-    
+
     # Fit model
     trainer.fit(model)
 

@@ -67,7 +67,7 @@ a binary search.
     .. code-block:: python
 
         def train_dataloader(self):
-            return DataLoader(train_dataset, batch_size=self.hparams.batch_size)
+            return DataLoader(train_dataset, batch_size=self.batch_size)
 
 .. warning::
 

@@ -59,24 +59,20 @@ Or disable it by passing
    trainer = Trainer(checkpoint_callback=False)
 
 
-The Lightning checkpoint also saves the hparams (hyperparams) passed into the LightningModule init.
+The Lightning checkpoint also saves the arguments passed into the LightningModule init
+under the `module_arguments` key in the checkpoint.
 
-.. note:: hparams is a `Namespace <https://docs.python.org/2/library/argparse.html#argparse.Namespace>`_.
-
-.. testcode::
-
-   from argparse import Namespace
+.. code-block:: python
 
-   # usually these come from command line args
-   args = Namespace(learning_rate=0.001)
+    class MyLightningModule(LightningModule):
 
-   # define you module to have hparams as the first arg
-   # this means your checkpoint will have everything that went into making
-   # this model (in this case, learning rate)
-   class MyLightningModule(LightningModule):
+       def __init__(self, learning_rate, *args, **kwargs):
+            super().__init__()
 
-       def __init__(self, hparams, *args, **kwargs):
-           self.hparams = hparams
+    # all init args were saved to the checkpoint
+    checkpoint = torch.load(CKPT_PATH)
+    print(checkpoint['module_arguments'])
+    # {'learning_rate': the_value}
 
 Manual saving
 ^^^^^^^^^^^^^
@@ -92,37 +88,42 @@ You can manually save checkpoints and restore your model from the checkpointed s
 Checkpoint Loading
 ------------------
 
-To load a model along with its weights, biases and hyperparameters use following method.
+To load a model along with its weights, biases and `module_arguments` use following method.
 
 .. code-block:: python
 
     model = MyLightingModule.load_from_checkpoint(PATH)
-    model.eval()
-    y_hat = model(x)
-
-The above only works if you used `hparams` in your model definition
 
-.. testcode::
-
-    class LitModel(LightningModule):
+    print(model.learning_rate)
+    # prints the learning_rate you used in this checkpoint
 
-        def __init__(self, hparams):
-            self.hparams = hparams
-            self.l1 = nn.Linear(hparams.in_dim, hparams.out_dim)
+    model.eval()
+    y_hat = model(x)
 
-But if you don't and instead pass individual parameters
+But if you don't want to use the values saved in the checkpoint, pass in your own here
 
 .. testcode::
 
     class LitModel(LightningModule):
 
         def __init__(self, in_dim, out_dim):
-            self.l1 = nn.Linear(in_dim, out_dim)
+            super().__init__()
+            self.in_dim = in_dim
+            self.out_dim = out_dim
+            self.l1 = nn.Linear(self.in_dim, self.out_dim)
 
 you can restore the model like this
 
 .. code-block:: python
 
+    # if you train and save the model like this it will use these values when loading
+    # the weights. But you can overwrite this
+    LitModel(in_dim=32, out_dim=10)
+
+    # uses in_dim=32, out_dim=10
+    model = LitModel.load_from_checkpoint(PATH)
+
+    # uses in_dim=128, out_dim=10
     model = LitModel.load_from_checkpoint(PATH, in_dim=128, out_dim=10)
 
 

@@ -148,23 +148,37 @@ class TransferLearningModel(pl.LightningModule):
         dl_path: Path where the data will be downloaded
     """
     def __init__(self,
-                 hparams: argparse.Namespace,
-                 dl_path: Union[str, Path]) -> None:
+                 dl_path: Union[str, Path],
+                 backbone: str = 'resnet50',
+                 train_bn: bool = True,
+                 milestones: tuple = (5, 10),
+                 batch_size: int = 8,
+                 lr: float = 1e-2,
+                 lr_scheduler_gamma: float = 1e-1,
+                 num_workers: int = 6) -> None:
         super().__init__()
-        self.hparams = hparams
+        self.dl_path = dl_path
+        self.backbone = backbone
+        self.train_bn = train_bn
+        self.milestones = milestones
+        self.batch_size = batch_size
+        self.lr = lr
+        self.lr_scheduler_gamma = lr_scheduler_gamma
+        self.num_workers = num_workers
+
         self.dl_path = dl_path
         self.__build_model()
 
     def __build_model(self):
         """Define model layers & loss."""
 
         # 1. Load pre-trained network:
-        model_func = getattr(models, self.hparams.backbone)
+        model_func = getattr(models, self.backbone)
         backbone = model_func(pretrained=True)
 
         _layers = list(backbone.children())[:-1]
         self.feature_extractor = torch.nn.Sequential(*_layers)
-        freeze(module=self.feature_extractor, train_bn=self.hparams.train_bn)
+        freeze(module=self.feature_extractor, train_bn=self.train_bn)
 
         # 2. Classifier:
         _fc_layers = [torch.nn.Linear(2048, 256),
@@ -194,29 +208,29 @@ def train(self, mode=True):
         super().train(mode=mode)
 
         epoch = self.current_epoch
-        if epoch < self.hparams.milestones[0] and mode:
+        if epoch < self.milestones[0] and mode:
             # feature extractor is frozen (except for BatchNorm layers)
             freeze(module=self.feature_extractor,
-                   train_bn=self.hparams.train_bn)
+                   train_bn=self.train_bn)
 
-        elif self.hparams.milestones[0] <= epoch < self.hparams.milestones[1] and mode:
+        elif self.milestones[0] <= epoch < self.milestones[1] and mode:
             # Unfreeze last two layers of the feature extractor
             freeze(module=self.feature_extractor,
                    n=-2,
-                   train_bn=self.hparams.train_bn)
+                   train_bn=self.train_bn)
 
     def on_epoch_start(self):
         """Use `on_epoch_start` to unfreeze layers progressively."""
         optimizer = self.trainer.optimizers[0]
-        if self.current_epoch == self.hparams.milestones[0]:
+        if self.current_epoch == self.milestones[0]:
             _unfreeze_and_add_param_group(module=self.feature_extractor[-2:],
                                           optimizer=optimizer,
-                                          train_bn=self.hparams.train_bn)
+                                          train_bn=self.train_bn)
 
-        elif self.current_epoch == self.hparams.milestones[1]:
+        elif self.current_epoch == self.milestones[1]:
             _unfreeze_and_add_param_group(module=self.feature_extractor[:-2],
                                           optimizer=optimizer,
-                                          train_bn=self.hparams.train_bn)
+                                          train_bn=self.train_bn)
 
     def training_step(self, batch, batch_idx):
 
@@ -246,7 +260,7 @@ def training_epoch_end(self, outputs):
                                        for output in outputs]).mean()
         train_acc_mean = torch.stack([output['num_correct']
                                       for output in outputs]).sum().float()
-        train_acc_mean /= (len(outputs) * self.hparams.batch_size)
+        train_acc_mean /= (len(outputs) * self.batch_size)
         return {'log': {'train_loss': train_loss_mean,
                         'train_acc': train_acc_mean,
                         'step': self.current_epoch}}
@@ -273,19 +287,19 @@ def validation_epoch_end(self, outputs):
                                      for output in outputs]).mean()
         val_acc_mean = torch.stack([output['num_correct']
                                     for output in outputs]).sum().float()
-        val_acc_mean /= (len(outputs) * self.hparams.batch_size)
+        val_acc_mean /= (len(outputs) * self.batch_size)
         return {'log': {'val_loss': val_loss_mean,
                         'val_acc': val_acc_mean,
                         'step': self.current_epoch}}
 
     def configure_optimizers(self):
         optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                       self.parameters()),
-                               lr=self.hparams.lr)
+                               lr=self.lr)
 
         scheduler = MultiStepLR(optimizer,
-                                milestones=self.hparams.milestones,
-                                gamma=self.hparams.lr_scheduler_gamma)
+                                milestones=self.milestones,
+                                gamma=self.lr_scheduler_gamma)
 
         return [optimizer], [scheduler]
 
@@ -326,8 +340,8 @@ def __dataloader(self, train):
 
         _dataset = self.train_dataset if train else self.valid_dataset
         loader = DataLoader(dataset=_dataset,
-                            batch_size=self.hparams.batch_size,
-                            num_workers=self.hparams.num_workers,
+                            batch_size=self.batch_size,
+                            num_workers=self.num_workers,
                             shuffle=True if train else False)
 
         return loader
@@ -397,28 +411,28 @@ def add_model_specific_args(parent_parser):
         return parser
 
 
-def main(hparams: argparse.Namespace) -> None:
+def main(args: argparse.Namespace) -> None:
     """Train the model.
 
     Args:
-        hparams: Model hyper-parameters
+        args: Model hyper-parameters
 
     Note:
         For the sake of the example, the images dataset will be downloaded
         to a temporary directory.
     """
 
-    with TemporaryDirectory(dir=hparams.root_data_path) as tmp_dir:
+    with TemporaryDirectory(dir=args.root_data_path) as tmp_dir:
 
-        model = TransferLearningModel(hparams, dl_path=tmp_dir)
+        model = TransferLearningModel(dl_path=tmp_dir, **vars(args))
 
         trainer = pl.Trainer(
             weights_summary=None,
             show_progress_bar=True,
             num_sanity_val_steps=0,
-            gpus=hparams.gpus,
-            min_epochs=hparams.nb_epochs,
-            max_epochs=hparams.nb_epochs)
+            gpus=args.gpus,
+            min_epochs=args.nb_epochs,
+            max_epochs=args.nb_epochs)
 
         trainer.fit(model)