Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda committed Jan 12, 2023
2 parents c44a892 + 0876a64 commit 77043b0
Show file tree
Hide file tree
Showing 48 changed files with 174 additions and 656 deletions.
4 changes: 2 additions & 2 deletions .azure/app-cloud-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:
#LAI_PASS: $(LAI_PASS) # for STAGING
LIGHTNING_USER_ID: $(LIGHTNING_USER_ID_PROD)
LIGHTNING_API_KEY: $(LIGHTNING_API_KEY_PROD)
LIGHTNING_USERNAME: $(LIGHTNING_USERNAME)
LIGHTNING_USERNAME: $(LIGHTNING_USERNAME_PROD)
LIGHTNING_CLOUD_URL: $(LIGHTNING_CLOUD_URL_PROD)
displayName: 'Run the tests'
Expand All @@ -196,7 +196,7 @@ jobs:
#LAI_PASS: $(LAI_PASS) # for STAGING
LIGHTNING_USER_ID: $(LIGHTNING_USER_ID_PROD)
LIGHTNING_API_KEY: $(LIGHTNING_API_KEY_PROD)
LIGHTNING_USERNAME: $(LIGHTNING_USERNAME)
LIGHTNING_USERNAME: $(LIGHTNING_USERNAME_PROD)
LIGHTNING_CLOUD_URL: $(LIGHTNING_CLOUD_URL_PROD)
timeoutInMinutes: "3"
displayName: 'Clean Previous Apps'
47 changes: 0 additions & 47 deletions docs/source-pytorch/ecosystem/transformers.rst

This file was deleted.

1 change: 0 additions & 1 deletion docs/source-pytorch/fabric/api/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ Strategies
Strategy
DDPStrategy
DataParallelStrategy
DDPShardedStrategy
FSDPStrategy
ParallelStrategy
SingleDeviceStrategy
Expand Down
7 changes: 2 additions & 5 deletions docs/source-pytorch/fabric/api/fabric_args.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Fabric Arguments
accelerator
===========

Choose one of ``"cpu"``, ``"gpu"``, ``"tpu"``, ``"auto"`` (IPU support is coming soon).
Choose one of ``"cpu"``, ``"gpu"``, ``"tpu"``, ``"auto"``.

.. code-block:: python
Expand All @@ -35,7 +35,7 @@ The ``"auto"`` option recognizes the machine you are on and selects the availabl
strategy
========

Choose a training strategy: ``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"tpu_spawn"``, ``"deepspeed"``, ``"ddp_sharded"``, or ``"ddp_sharded_spawn"``.
Choose a training strategy: ``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"xla"``, ``"deepspeed"``, ``"fsdp"````.

.. code-block:: python
Expand All @@ -55,9 +55,6 @@ Additionally, you can pass in your custom strategy by configuring additional par
fabric = Fabric(strategy=DeepSpeedStrategy(stage=2), accelerator="gpu", devices=2)
Support for Fully Sharded training strategies are coming soon.


devices
=======

Expand Down
4 changes: 2 additions & 2 deletions requirements/app/components.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# deps required by components in the lightning app repository (src/lightning_app/components)
lightning_api_access>=0.0.3 # serve
aiohttp>=3.8.0, <=3.8.3 # auto_scaler
# lightning_fabric # multinode # uncomment when released. it's okay to comment for now because pl includes it
pytorch_lightning # multinode
# lightning-fabric>=1.9.0 # multinode # uncomment when released. it's okay to comment for now because pl includes it
pytorch-lightning>1.8.0 # multinode
2 changes: 0 additions & 2 deletions requirements/fabric/strategies.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

fairscale>=0.4.5, <0.4.13
deepspeed>=0.6.0, <=0.7.0
1 change: 0 additions & 1 deletion src/lightning_app/components/multi_node/lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def run(
try:
pkg = importlib.import_module(pkg_name)
fabrics.append(pkg.Fabric)
strategies.append(pkg.strategies.DDPShardedStrategy)
strategies.append(pkg.strategies.DDPStrategy)
mps_accelerators.append(pkg.accelerators.MPSAccelerator)
except (ImportError, ModuleNotFoundError):
Expand Down
2 changes: 1 addition & 1 deletion src/lightning_fabric/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Removed

-
- Removed support for FairScale's sharded training (`strategy='ddp_sharded'|'ddp_sharded_spawn'`). Use Fully-Sharded Data Parallel instead (`strategy='fsdp'`) ([#16329](https://github.com/Lightning-AI/lightning/pull/16329))

### Fixed

Expand Down
24 changes: 2 additions & 22 deletions src/lightning_fabric/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@
from lightning_fabric.plugins.precision.fsdp import FSDPPrecision
from lightning_fabric.plugins.precision.precision import _PRECISION_INPUT, _PRECISION_INPUT_INT, _PRECISION_INPUT_STR
from lightning_fabric.strategies import (
DDPShardedStrategy,
DDPStrategy,
DeepSpeedStrategy,
SingleDeviceStrategy,
SingleTPUStrategy,
Expand All @@ -54,7 +52,7 @@
)
from lightning_fabric.strategies.ddp import _DDP_FORK_ALIASES
from lightning_fabric.strategies.fsdp import _FSDP_ALIASES, FSDPStrategy
from lightning_fabric.utilities import _StrategyType, rank_zero_info, rank_zero_warn
from lightning_fabric.utilities import rank_zero_info, rank_zero_warn
from lightning_fabric.utilities.device_parser import _determine_root_gpu_device
from lightning_fabric.utilities.imports import _IS_INTERACTIVE

Expand Down Expand Up @@ -516,7 +514,7 @@ def _lazy_init_strategy(self) -> None:
raise RuntimeError(
f"`Fabric(strategy={self._strategy_flag!r})` is not compatible with an interactive"
" environment. Run your code as a script, or choose one of the compatible strategies:"
f" Fabric(strategy=None|{'|'.join(_StrategyType.interactive_compatible_types())})."
f" `Fabric(strategy=None|'dp'|'ddp_notebook')`."
" In case you are spawning processes yourself, make sure to include the Fabric"
" creation inside the worker function."
)
Expand Down Expand Up @@ -547,21 +545,3 @@ def _argument_from_env(name: str, current: Any, default: Any) -> Any:
if env_value is None:
return current
return env_value

@property
def is_distributed(self) -> bool:
# TODO: deprecate this property
# Used for custom plugins.
# Custom plugins should implement is_distributed property.
if hasattr(self.strategy, "is_distributed") and not isinstance(self.accelerator, TPUAccelerator):
return self.strategy.is_distributed
distributed_strategy = (
DDPStrategy,
DDPShardedStrategy,
DeepSpeedStrategy,
XLAStrategy,
)
is_distributed = isinstance(self.strategy, distributed_strategy)
if isinstance(self.accelerator, TPUAccelerator):
is_distributed |= self.strategy.is_distributed
return is_distributed
26 changes: 6 additions & 20 deletions src/lightning_fabric/fabric.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from contextlib import contextmanager, nullcontext
from functools import partial
from pathlib import Path
from typing import Any, Callable, cast, Dict, Generator, List, Optional, overload, Sequence, Tuple, Union
from typing import Any, Callable, cast, Dict, Generator, List, Mapping, Optional, overload, Sequence, Tuple, Union

import torch
import torch.nn as nn
Expand All @@ -32,14 +32,7 @@
from lightning_fabric.plugins import Precision # avoid circular imports: # isort: split
from lightning_fabric.accelerators.accelerator import Accelerator
from lightning_fabric.connector import _Connector, _PLUGIN_INPUT, _PRECISION_INPUT
from lightning_fabric.strategies import (
DDPShardedStrategy,
DeepSpeedStrategy,
FSDPStrategy,
SingleDeviceStrategy,
Strategy,
XLAStrategy,
)
from lightning_fabric.strategies import DeepSpeedStrategy, FSDPStrategy, SingleDeviceStrategy, Strategy, XLAStrategy
from lightning_fabric.strategies.strategy import _Sharded, TBroadcast
from lightning_fabric.utilities import move_data_to_device
from lightning_fabric.utilities.apply_func import convert_tensors_to_scalars, convert_to_tensors
Expand Down Expand Up @@ -69,7 +62,7 @@ class Fabric:
accelerator: The hardware to run on. Possible choices are:
``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
strategy: Strategy for how to run across multiple devices. Possible choices are:
``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"deepspeed"``, ``"ddp_sharded"``.
``"dp"``, ``"ddp"``, ``"ddp_spawn"``, ``"deepspeed"``, ``"fsdp"``.
devices: Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
The value applies per node.
num_nodes: Number of GPU nodes for distributed training.
Expand Down Expand Up @@ -604,7 +597,7 @@ def log(self, name: str, value: Any, step: Optional[int] = None) -> None:
"""
self.log_dict(metrics={name: value}, step=step)

def log_dict(self, metrics: Dict[str, Any], step: Optional[int] = None) -> None:
def log_dict(self, metrics: Mapping[str, Any], step: Optional[int] = None) -> None:
"""Log multiple scalars at once to all loggers that were added to Fabric.
Args:
Expand Down Expand Up @@ -673,7 +666,7 @@ def _move_model_to_device(self, model: nn.Module, optimizers: List[Optimizer]) -

def _requires_distributed_sampler(self, dataloader: DataLoader) -> bool:
return (
self._connector.is_distributed
getattr(self.strategy, "distributed_sampler_kwargs", None) is not None
and not isinstance(dataloader.sampler, DistributedSampler)
and not has_iterable_dataset(dataloader)
)
Expand Down Expand Up @@ -713,15 +706,8 @@ def _validate_setup_module(self, module: nn.Module) -> None:
if isinstance(module, _FabricModule):
raise ValueError("A model should be passed only once to the `setup_module` method.")

if isinstance(self._strategy, DDPShardedStrategy):
raise RuntimeError(
f"The `{type(self._strategy).__name__}` requires the model and optimizer(s) to be set up jointly"
" through `.setup(model, optimizer, ...)`. For inference, choose a different strategy, for example"
" `ddp`."
)

def _validate_setup_optimizers(self, optimizers: Sequence[Optimizer]) -> None:
if isinstance(self._strategy, (DeepSpeedStrategy, DDPShardedStrategy, XLAStrategy)):
if isinstance(self._strategy, (DeepSpeedStrategy, XLAStrategy)):
raise RuntimeError(
f"The `{type(self._strategy).__name__}` requires the model and optimizer(s) to be set up jointly"
" through `.setup(model, optimizer, ...)`."
Expand Down
1 change: 0 additions & 1 deletion src/lightning_fabric/strategies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from lightning_fabric.strategies.ddp import DDPStrategy # noqa: F401
from lightning_fabric.strategies.deepspeed import DeepSpeedStrategy # noqa: F401
from lightning_fabric.strategies.dp import DataParallelStrategy # noqa: F401
from lightning_fabric.strategies.fairscale import DDPShardedStrategy # noqa: F401
from lightning_fabric.strategies.fsdp import FSDPStrategy # noqa: F401
from lightning_fabric.strategies.parallel import ParallelStrategy # noqa: F401
from lightning_fabric.strategies.registry import _call_register_strategies, _StrategyRegistry
Expand Down
4 changes: 0 additions & 4 deletions src/lightning_fabric/strategies/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ def root_device(self) -> torch.device:
assert self.parallel_devices is not None
return self.parallel_devices[self.local_rank]

@property
def is_distributed(self) -> bool:
return True

@property
def num_nodes(self) -> int:
return self._num_nodes
Expand Down
4 changes: 4 additions & 0 deletions src/lightning_fabric/strategies/dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def root_device(self) -> torch.device:
assert self.parallel_devices is not None
return self.parallel_devices[0]

@property
def distributed_sampler_kwargs(self) -> None:
return None

def setup_module(self, module: Module) -> DataParallel:
"""Wraps the given model into a :class:`~torch.nn.parallel.DataParallel` module."""
return DataParallel(module=module, device_ids=self.parallel_devices)
Expand Down
Loading

0 comments on commit 77043b0

Please sign in to comment.