Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove default optimizer, add None optimizer option #1279

Merged
merged 7 commits into from
Apr 2, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added support for non-primitive types in `hparams` for `TensorboardLogger` ([#1130](https://github.com/PyTorchLightning/pytorch-lightning/pull/1130))
- Added a check that stops the training when loss or weights contain `NaN` or `inf` values. ([#1097](https://github.com/PyTorchLightning/pytorch-lightning/pull/1097))
- Updated references to self.forward() to instead use the `__call__` interface. ([#1211](https://github.com/PyTorchLightning/pytorch-lightning/pull/1211))
- Added option to run without an optimizer by returning `None` from `configure_optimizers`. ([#1279](https://github.com/PyTorchLightning/pytorch-lightning/pull/1279))

### Changed

-
- Changed default behaviour of `configure_optimizers` to use no optimizer rather than Adam. ([#1279](https://github.com/PyTorchLightning/pytorch-lightning/pull/1279))

### Deprecated

Expand Down
13 changes: 6 additions & 7 deletions pytorch_lightning/core/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import torch.distributed as torch_distrib
from torch import Tensor
from torch.nn.parallel import DistributedDataParallel
from torch.optim import Adam
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader

Expand Down Expand Up @@ -905,24 +904,25 @@ def configure_apex(self, amp, model, optimizers, amp_level):

return model, optimizers

def configure_optimizers(self) -> Union[
def configure_optimizers(self) -> Optional[Union[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if we just make the whole function optional? instead of just the args? or is that optional syntax doing that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or i guess if we make the method still required but optional return, then it maintains readability? on second thought i kind of like this better. that way it’s explicit in code that you used no optimizer

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this does that - or at least, PyCharm isn't prompting me to implement the method :) - the optional thing just means you wont get a type warning if you return None

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^^ we can do that - currently method isn't required but happy to change that :) agree that this behaviour should be explicit

Optimizer, List[Optimizer], Tuple[Optimizer, ...], Tuple[List[Optimizer], List]
]:
]]:
r"""
Choose what optimizers and learning-rate schedulers to use in your optimization.
Normally you'd need one. But in the case of GANs or similar you might have multiple.

If you don't define this method Lightning will automatically use Adam(lr=1e-3)
If you don't define this method Lightning will run **without any optimizer**.

Return: any of these 3 options:
Return: any of these 4 options:
- Single optimizer
- List or Tuple - List of optimizers
- Two lists - The first list has multiple optimizers, the second a list of LR schedulers
- None - Fit will run without any optimizer

Examples:
.. code-block:: python

# most cases (default if not defined)
# most cases
def configure_optimizers(self):
opt = Adam(self.parameters(), lr=1e-3)
return opt
Expand Down Expand Up @@ -983,7 +983,6 @@ def configure_optimizers(self):
}

"""
return Adam(self.parameters(), lr=1e-3)

def optimizer_step(
self,
Expand Down
2 changes: 1 addition & 1 deletion pytorch_lightning/trainer/distrib_data_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def ddp_train(self, gpu_idx, model):

# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers())
self.optimizers, self.lr_schedulers = self.init_optimizers(model)

# MODEL
# copy model to each gpu
Expand Down
6 changes: 3 additions & 3 deletions pytorch_lightning/trainer/distrib_parts.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def single_gpu_train(self, model):

# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers())
self.optimizers, self.lr_schedulers = self.init_optimizers(model)

if self.use_amp:
# An example
Expand All @@ -485,7 +485,7 @@ def tpu_train(self, tpu_core_idx, model):

# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers())
self.optimizers, self.lr_schedulers = self.init_optimizers(model)

# init 16 bit for TPU
if self.precision == 16:
Expand All @@ -504,7 +504,7 @@ def dp_train(self, model):

# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers())
self.optimizers, self.lr_schedulers = self.init_optimizers(model)

model.cuda(self.root_gpu)

Expand Down
102 changes: 102 additions & 0 deletions pytorch_lightning/trainer/optimizers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import warnings
from abc import ABC
from typing import List, Tuple

import torch
from torch import optim
from torch.optim.optimizer import Optimizer

from pytorch_lightning.core.lightning import LightningModule


class TrainerOptimizersMixin(ABC):

def init_optimizers(
self,
model: LightningModule
) -> Tuple[List, List]:
optimizers = model.configure_optimizers()

if optimizers is None:
warnings.warn('`LightningModule.configure_optimizers` is not overriden or returned `None`,'
'this fit will run with no optimizer', UserWarning)
ethanwharris marked this conversation as resolved.
Show resolved Hide resolved
optimizers = _MockOptimizer()

# single output, single optimizer
if isinstance(optimizers, Optimizer):
return [optimizers], []

# two lists, optimizer + lr schedulers
elif len(optimizers) == 2 and isinstance(optimizers[0], list):
optimizers, lr_schedulers = optimizers
lr_schedulers = self.configure_schedulers(lr_schedulers)
return optimizers, lr_schedulers

# single list or tuple, multiple optimizer
elif isinstance(optimizers, (list, tuple)):
return optimizers, []

# unknown configuration
else:
raise ValueError('Unknown configuration for model optimizers. Output'
'from model.configure_optimizers() should either be:'
'* single output, single torch.optim.Optimizer'
'* single output, list of torch.optim.Optimizer'
'* two outputs, first being a list of torch.optim.Optimizer',
'second being a list of torch.optim.lr_scheduler')

def configure_schedulers(self, schedulers: list):
# Convert each scheduler into dict sturcture with relevant information
lr_schedulers = []
default_config = {'interval': 'epoch', # default every epoch
'frequency': 1, # default every epoch/batch
'reduce_on_plateau': False, # most often not ReduceLROnPlateau scheduler
'monitor': 'val_loss'} # default value to monitor for ReduceLROnPlateau
for scheduler in schedulers:
if isinstance(scheduler, dict):
if 'scheduler' not in scheduler:
raise ValueError(f'Lr scheduler should have key `scheduler`',
' with item being a lr scheduler')
scheduler['reduce_on_plateau'] = isinstance(
scheduler['scheduler'], optim.lr_scheduler.ReduceLROnPlateau)

lr_schedulers.append({**default_config, **scheduler})

elif isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
lr_schedulers.append({**default_config, 'scheduler': scheduler,
'reduce_on_plateau': True})

elif isinstance(scheduler, optim.lr_scheduler._LRScheduler):
lr_schedulers.append({**default_config, 'scheduler': scheduler})
else:
raise ValueError(f'Input {scheduler} to lr schedulers '
'is a invalid input.')
return lr_schedulers


class _MockOptimizer(Optimizer):
"""The `_MockOptimizer` will be used inplace of an optimizer in the event that `None`
is returned from `configure_optimizers`.
"""

def __init__(self):
super().__init__([torch.zeros(1)], {})

def add_param_group(self, param_group):
pass # Do Nothing

def load_state_dict(self, state_dict):
pass # Do Nothing

def state_dict(self):
return {} # Return Empty

def step(self, closure=None):
if closure is not None:
closure()

def zero_grad(self):
pass # Do Nothing

def __repr__(self):
return 'No Optimizer'
62 changes: 3 additions & 59 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import torch
import torch.distributed as torch_distrib
import torch.multiprocessing as mp
from torch import optim
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

Expand All @@ -30,6 +28,7 @@
from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
from pytorch_lightning.trainer.optimizers import TrainerOptimizersMixin
from pytorch_lightning.trainer.training_io import TrainerIOMixin
from pytorch_lightning.trainer.training_loop import TrainerTrainLoopMixin
from pytorch_lightning.trainer.training_tricks import TrainerTrainingTricksMixin
Expand All @@ -54,6 +53,7 @@

class Trainer(
TrainerIOMixin,
TrainerOptimizersMixin,
TrainerDPMixin,
TrainerDDPMixin,
TrainerLoggingMixin,
Expand Down Expand Up @@ -714,7 +714,7 @@ def fit(

# CHOOSE OPTIMIZER
# allow for lr schedulers as well
self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers())
self.optimizers, self.lr_schedulers = self.init_optimizers(model)

self.run_pretrain_routine(model)

Expand Down Expand Up @@ -758,62 +758,6 @@ def __attach_dataloaders(self, model, train_dataloader, val_dataloaders, test_da

model.test_dataloader = _PatchDataLoader(test_dataloaders)

def init_optimizers(
self,
optimizers: Union[Optimizer, Tuple[List, List], List[Optimizer], Tuple[Optimizer]]
) -> Tuple[List, List]:

# single output, single optimizer
if isinstance(optimizers, Optimizer):
return [optimizers], []

# two lists, optimizer + lr schedulers
elif len(optimizers) == 2 and isinstance(optimizers[0], list):
optimizers, lr_schedulers = optimizers
lr_schedulers = self.configure_schedulers(lr_schedulers)
return optimizers, lr_schedulers

# single list or tuple, multiple optimizer
elif isinstance(optimizers, (list, tuple)):
return optimizers, []

# unknown configuration
else:
raise ValueError('Unknown configuration for model optimizers. Output'
'from model.configure_optimizers() should either be:'
'* single output, single torch.optim.Optimizer'
'* single output, list of torch.optim.Optimizer'
'* two outputs, first being a list of torch.optim.Optimizer',
'second being a list of torch.optim.lr_scheduler')

def configure_schedulers(self, schedulers: list):
# Convert each scheduler into dict sturcture with relevant information
lr_schedulers = []
default_config = {'interval': 'epoch', # default every epoch
'frequency': 1, # default every epoch/batch
'reduce_on_plateau': False, # most often not ReduceLROnPlateau scheduler
'monitor': 'val_loss'} # default value to monitor for ReduceLROnPlateau
for scheduler in schedulers:
if isinstance(scheduler, dict):
if 'scheduler' not in scheduler:
raise ValueError(f'Lr scheduler should have key `scheduler`',
' with item being a lr scheduler')
scheduler['reduce_on_plateau'] = isinstance(
scheduler['scheduler'], optim.lr_scheduler.ReduceLROnPlateau)

lr_schedulers.append({**default_config, **scheduler})

elif isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
lr_schedulers.append({**default_config, 'scheduler': scheduler,
'reduce_on_plateau': True})

elif isinstance(scheduler, optim.lr_scheduler._LRScheduler):
lr_schedulers.append({**default_config, 'scheduler': scheduler})
else:
raise ValueError(f'Input {scheduler} to lr schedulers '
'is a invalid input.')
return lr_schedulers

def run_pretrain_routine(self, model: LightningModule):
"""Sanity check a few things before starting actual training.

Expand Down
3 changes: 2 additions & 1 deletion tests/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
LightTestOptimizerWithSchedulingMixin,
LightTestMultipleOptimizersWithSchedulingMixin,
LightTestOptimizersWithMixedSchedulingMixin,
LightTestReduceLROnPlateauMixin
LightTestReduceLROnPlateauMixin,
LightTestNoneOptimizerMixin
)


Expand Down
5 changes: 5 additions & 0 deletions tests/base/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,11 @@ def configure_optimizers(self):
return [optimizer], [lr_scheduler]


class LightTestNoneOptimizerMixin:
def configure_optimizers(self):
return None


def _get_output_metric(output, name):
if isinstance(output, dict):
val = output[name]
Expand Down
2 changes: 1 addition & 1 deletion tests/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def run_model_test(trainer_options, model, on_gpu=True):
if trainer.use_ddp or trainer.use_ddp2:
# on hpc this would work fine... but need to hack it for the purpose of the test
trainer.model = pretrained_model
trainer.optimizers, trainer.lr_schedulers = trainer.init_optimizers(pretrained_model.configure_optimizers())
trainer.optimizers, trainer.lr_schedulers = trainer.init_optimizers(pretrained_model)

# test HPC loading / saving
trainer.hpc_save(save_dir, logger)
Expand Down
35 changes: 0 additions & 35 deletions tests/models/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,41 +87,6 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
assert result == 1, "DDP doesn't work with dataloaders passed to fit()."


def test_optimizer_return_options():
tutils.reset_seed()

trainer = Trainer()
model, hparams = tutils.get_default_model()

# single optimizer
opt_a = torch.optim.Adam(model.parameters(), lr=0.002)
opt_b = torch.optim.SGD(model.parameters(), lr=0.002)
optim, lr_sched = trainer.init_optimizers(opt_a)
assert len(optim) == 1 and len(lr_sched) == 0

# opt tuple
opts = (opt_a, opt_b)
optim, lr_sched = trainer.init_optimizers(opts)
assert len(optim) == 2 and optim[0] == opts[0] and optim[1] == opts[1]
assert len(lr_sched) == 0

# opt list
opts = [opt_a, opt_b]
optim, lr_sched = trainer.init_optimizers(opts)
assert len(optim) == 2 and optim[0] == opts[0] and optim[1] == opts[1]
assert len(lr_sched) == 0

# opt tuple of lists
scheduler = torch.optim.lr_scheduler.StepLR(opt_a, 10)
opts = ([opt_a], [scheduler])
optim, lr_sched = trainer.init_optimizers(opts)
assert len(optim) == 1 and len(lr_sched) == 1
assert optim[0] == opts[0][0] and \
lr_sched[0] == dict(scheduler=scheduler, interval='epoch',
frequency=1, reduce_on_plateau=False,
monitor='val_loss')


def test_cpu_slurm_save_load(tmpdir):
"""Verify model save/load/checkpoint on CPU."""
tutils.reset_seed()
Expand Down
Loading