diff --git a/.actions/setup_tools.py b/.actions/setup_tools.py
index 5088be2020738..a76e81246798c 100644
--- a/.actions/setup_tools.py
+++ b/.actions/setup_tools.py
@@ -94,11 +94,10 @@ def load_readme_description(path_dir: str, homepage: str, version: str) -> str:
     text = text.replace("pytorch-lightning.readthedocs.io/en/stable/", f"pytorch-lightning.readthedocs.io/en/{version}")
     # codecov badge
     text = text.replace("/branch/master/graph/badge.svg", f"/release/{version}/graph/badge.svg")
-    # replace github badges for release ones
+    # github actions badge
     text = text.replace("badge.svg?branch=master&event=push", f"badge.svg?tag={version}")
-    # Azure...
+    # azure pipelines badge
     text = text.replace("?branchName=master", f"?branchName=refs%2Ftags%2F{version}")
-    text = re.sub(r"\?definitionId=\d+&branchName=master", f"?definitionId=2&branchName=refs%2Ftags%2F{version}", text)
 
     skip_begin = r"<!-- following section will be skipped from PyPI description -->"
     skip_end = r"<!-- end skipping PyPI description -->"
diff --git a/.azure/gpu-benchmark.yml b/.azure/gpu-benchmark.yml
index ac5ca6f60a6b4..0de590f2c54a6 100644
--- a/.azure/gpu-benchmark.yml
+++ b/.azure/gpu-benchmark.yml
@@ -28,7 +28,7 @@ jobs:
     cancelTimeoutInMinutes: "2"
     pool: azure-jirka-spot
     container:
-      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12"
+      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.3.1"
       options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
     workspace:
       clean: all
diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
index f37c17613affc..683212cd55d4b 100644
--- a/.azure/gpu-tests.yml
+++ b/.azure/gpu-tests.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         'PyTorch - stable':
-          image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12"
+          image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.3.1"
     # how long to run the job before automatically cancelling
     timeoutInMinutes: "80"
     # how much time to give 'run always even if cancelled tasks' before stopping them
@@ -44,7 +44,7 @@ jobs:
 
     - bash: |
         CHANGED_FILES=$(git diff --name-status origin/master -- . | awk  '{print $2}')
-        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.azure/*'
+        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
         echo $CHANGED_FILES > changed_files.txt
         MATCHES=$(cat changed_files.txt | grep -E $FILTER)
         echo $MATCHES
@@ -75,7 +75,7 @@ jobs:
         CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
         pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
         pip install -e .[strategies]
-        pip install deepspeed>0.6.4  # TODO: remove when docker images are upgraded
+        pip install -U deepspeed  # TODO: remove when docker images are upgraded
         pip install --requirement requirements/pytorch/devel.txt
         pip list
       env:
@@ -119,15 +119,6 @@ jobs:
       timeoutInMinutes: "35"
       condition: eq(variables['continue'], '1')
 
-    - bash: bash run_standalone_tasks.sh
-      workingDirectory: tests/tests_pytorch
-      env:
-        PL_USE_MOCKED_MNIST: "1"
-        PL_RUN_CUDA_TESTS: "1"
-      displayName: 'Testing: PyTorch standalone tasks'
-      timeoutInMinutes: "10"
-      condition: eq(variables['continue'], '1')
-
     - bash: |
         python -m coverage report
         python -m coverage xml
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 0fe790310f247..f71844e9664fe 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,8 +1,14 @@
 blank_issues_enabled: false
 contact_links:
-  - name: Ask a Question
+  - name: ❓ Ask a Question
     url: https://github.com/Lightning-AI/lightning/discussions/new
-    about: Ask and answer Lightning related questions
-  - name: 💬 Slack
+    about: Ask and answer Lightning related questions.
+  - name: 💬 Chat with us
     url: https://www.pytorchlightning.ai/community
-    about: Chat with our community
+    about: Live chat with experts, engineers, and users in our Slack community.
+  - name: 📖 Read the documentation
+    url: https://lightning.ai/lightning-docs/
+    about: Please consult the documentation before opening any issues!
+  - name: 🙋 Contact us about professional services
+    url: https://lightning.ai
+    about: Contact the Lightning.ai sales team for paid support.
diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml
new file mode 100644
index 0000000000000..c2654eddd7ca1
--- /dev/null
+++ b/.github/checkgroup.yml
@@ -0,0 +1,173 @@
+custom_service_name: "Lightning CI required checker"
+subprojects:
+  - id: "CI: CircleCI"
+    paths:
+      - ".circleci/**"
+    checks:
+      - "test-on-tpus"
+
+  - id: "CI: Azure"
+    paths:
+      - ".azure/**"
+    checks:
+      - "pytorch-lightning (GPUs)"
+      - "pytorch-lightning (GPUs) (testing PyTorch - stable)"
+      - "pytorch-lightning (HPUs)"
+      - "pytorch-lightning (IPUs)"
+
+  - id: "pytorch_lightning"
+    paths:
+      # all examples don't need to be added because they aren't used in CI, but these are
+      - "examples/run_ddp_examples.sh"
+      - "examples/convert_from_pt_to_pl/**"
+      - "examples/run_pl_examples.sh"
+      - "examples/pl_basics/backbone_image_classifier.py"
+      - "examples/pl_basics/autoencoder.py"
+      - "examples/pl_loops/mnist_lite.py"
+      - "examples/pl_fault_tolerant/automatic.py"
+      - "examples/test_pl_examples.py"
+      - "examples/pl_integrations/dali_image_classifier.py"
+      - "requirements/pytorch/**"
+      - "src/pytorch_lightning/**"
+      - "tests/tests_pytorch/**"
+      - "setup.cfg"  # includes pytest config
+      - ".github/workflows/ci-pytorch*.yml"
+      - ".github/workflows/docs-*.yml"
+    checks:
+      - "conda (3.8, 1.10)"
+      - "conda (3.8, 1.9)"
+      - "conda (3.9, 1.11)"
+      - "conda (3.9, 1.12)"
+      - "cpu (macOS-11, 3.10, latest, stable)"
+      - "cpu (macOS-11, 3.7, latest, stable)"
+      - "cpu (macOS-11, 3.7, oldest, stable)"
+      - "cpu (ubuntu-20.04, 3.10, latest, stable)"
+      - "cpu (ubuntu-20.04, 3.7, latest, stable)"
+      - "cpu (ubuntu-20.04, 3.7, oldest, stable)"
+      - "cpu (windows-2022, 3.10, latest, stable)"
+      - "cpu (windows-2022, 3.7, latest, stable)"
+      - "cpu (windows-2022, 3.7, oldest, stable)"
+      - "doctest (pytorch)"
+      - "make-docs (pytorch)"
+      - "mypy"
+      - "PR Gatekeeper (pytorch)"
+      - "pytorch-lightning (GPUs)"
+      - "pytorch-lightning (GPUs) (testing PyTorch - stable)"
+      - "pytorch-lightning (HPUs)"
+      - "pytorch-lightning (IPUs)"
+      - "slow (macOS-11, 3.7, 1.11)"
+      - "slow (ubuntu-20.04, 3.7, 1.11)"
+      - "slow (windows-2022, 3.7, 1.11)"
+      - "test-on-tpus"
+
+  - id: "pytorch_lightning: Docs"
+    paths:
+      - "docs/source-pytorch/**"
+      - ".github/workflows/docs-*.yml"
+      - "requirements/docs.txt"
+      - "requirements/pytorch/**"
+    checks:
+      - "doctest (pytorch)"
+      - "make-docs (pytorch)"
+
+  - id: "pytorch_lightning: Docker"
+    paths:
+      - "dockers/**"
+      - "!dockers/README.md"
+      - "requirements.txt"
+      - "requirements/*.txt"
+      - "requirements/pytorch/*"
+      - "environment.yml"
+      - ".github/workflows/*docker*.yml"
+      - "setup.py"
+    checks:
+      - "build-conda (3.8, 1.10)"
+      - "build-conda (3.8, 1.9)"
+      - "build-conda (3.9, 1.11)"
+      - "build-conda (3.9, 1.12)"
+      - "build-cuda (3.8, 1.9, 11.1.1)"
+      - "build-cuda (3.9, 1.10, 11.3.1)"
+      - "build-cuda (3.9, 1.11, 11.3.1)"
+      - "build-cuda (3.9, 1.12, 11.3.1)"
+      - "build-cuda (3.9, 1.9, 11.1.1)"
+      - "build-hpu (1.5.0, 1.11.0)"
+      - "build-ipu (3.9, 1.9)"
+      - "build-NGC"
+      - "build-pl (3.9, 1.10, 11.3.1)"
+      - "build-pl (3.9, 1.11, 11.3.1)"
+      - "build-pl (3.9, 1.12, 11.3.1)"
+      - "build-pl (3.9, 1.9, 11.1.1)"
+      - "build-xla (3.7, 1.12)"
+
+  - id: "pytorch_lightning: mypy"
+    paths:
+      - ".github/workflows/code-checks.yml"
+      - "pyproject.toml"  # includes mypy config
+    checks:
+      - "mypy"
+
+  - id: "lightning_app"
+    paths:
+      - ".github/workflows/ci-app*.yml"
+      - "requirements/app/**"
+      - "src/lightning_app/**"
+      - "tests/tests_app/**"
+      - "tests/tests_app_examples/**"
+      - "tests/tests_clusters/**"
+      # the examples are used in the app CI
+      - "examples/app_*"
+    checks:
+      - "Cloud Test (boring_app)"
+      - "Cloud Test (collect_failures)"
+      - "Cloud Test (commands_and_api)"
+      - "Cloud Test (custom_work_dependencies)"
+      - "Cloud Test (drive)"
+      - "Cloud Test (idle_timeout)"
+      - "Cloud Test (payload)"
+      - "Cloud Test (template_jupyterlab)"
+      - "Cloud Test (template_react_ui)"
+      - "Cloud Test (template_streamlit_ui)"
+      - "Cloud Test (v0_app)"
+      - "doctest (app)"
+      - "make-docs (app)"
+      - "pytest (macOS-11, 3.8, latest)"
+      - "pytest (macOS-11, 3.8, oldest)"
+      - "pytest (ubuntu-20.04, 3.8, latest)"
+      - "pytest (ubuntu-20.04, 3.8, oldest)"
+      - "pytest (windows-2022, 3.8, latest)"
+      - "pytest (windows-2022, 3.8, oldest)"
+
+  - id: "lightning_app: Docs"
+    paths:
+      - "docs/source-app/**"
+      - ".github/workflows/docs-*.yml"
+      - "requirements/docs.txt"
+      - "requirements/app/**"
+    checks:
+      - "doctest (app)"
+      - "make-docs (app)"
+
+  - id: "install"
+    paths:
+      - ".actions/setup_tools.py"
+      - ".github/workflows/ci-pkg-install.yml"
+      - "setup.py"
+      - "src/lightning/**"
+      # all __about__, __version__, __setup__
+      - "src/*/__*.py"
+    checks:
+      - "install-meta-pypi (macOS-11, 3.8)"
+      - "install-meta-pypi (ubuntu-20.04, 3.8)"
+      - "install-meta-pypi (windows-2022, 3.8)"
+      - "install-meta-src (macOS-11, 3.8)"
+      - "install-meta-src (macOS-11, lightning, 3.8)"
+      - "install-meta-src (ubuntu-20.04, 3.8)"
+      - "install-meta-src (ubuntu-20.04, lightning, 3.8)"
+      - "install-meta-src (windows-2022, 3.8)"
+      - "install-meta-src (windows-2022, lightning, 3.8)"
+      - "install-standalone (macOS-11, app, 3.8)"
+      - "install-standalone (macOS-11, pytorch, 3.8)"
+      - "install-standalone (ubuntu-20.04, app, 3.8)"
+      - "install-standalone (ubuntu-20.04, pytorch, 3.8)"
+      - "install-standalone (windows-2022, app, 3.8)"
+      - "install-standalone (windows-2022, pytorch, 3.8)"
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index f559551e1237f..4ed903c0f3a93 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -4,16 +4,16 @@
 
 ## Unit and Integration Testing
 
-| workflow name              | workflow file                       | action                                                                                                                                                                      | accelerator\* | (Python, PyTorch)                                | OS                  |
-| -------------------------- | ----------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | ------------------------------------------------ | ------------------- |
-| Test full                  | .github/workflows/ci_test-full.yml  | Run all tests except for accelerator-specific, standalone and slow tests.                                                                                                   | CPU           | (3.7, 1.9), (3.7, 1.12), (3.10, 1.12)            | linux, mac, windows |
-| Test with Conda            | .github/workflows/ci_test-conda.yml | Same as ci_test-full.yml but with dependencies installed with conda.                                                                                                        | CPU           | (3.8, 1.8), (3.8, 1.9), (3.8, 1.10), (3.9, 1.12) | linux               |
-| Test slow                  | .github/workflows/ci_test-slow.yml  | Run only slow tests. Slow tests usually need to spawn threads and cannot be speed up or simplified.                                                                         | CPU           | (3.7, 1.8)                                       | linux, mac, windows |
-| pytorch-lightning (IPUs)   | .azure-pipelines/ipu-tests.yml      | Run only IPU-specific tests.                                                                                                                                                | IPU           | (3.8, 1.9)                                       | linux               |
-| pytorch-lightning (HPUs)   | .azure-pipelines/hpu-tests.yml      | Run only HPU-specific tests.                                                                                                                                                | HPU           | (3.8, 1.10)                                      | linux               |
-| pytorch-lightning (GPUs)   | .azure-pipelines/gpu-tests.yml      | Run all CPU and GPU-specific tests, standalone, and examples. Each standalone test needs to be run in separate processes to avoid unwanted interactions between test cases. | GPU           | (3.9, 1.12)                                      | linux               |
-| PyTorchLightning.Benchmark | .azure-pipelines/gpu-benchmark.yml  | Run speed/memory benchmarks for parity with pure PyTorch.                                                                                                                   | GPU           | (3.9, 1.12)                                      | linux               |
-| test-on-tpus               | .circleci/config.yml                | Run only TPU-specific tests.                                                                                                                                                | TPU           | (3.7, 1.12)                                      | linux               |
+| workflow name              | workflow file                               | action                                                                                                                                                                      | accelerator\* | (Python, PyTorch)                                 | OS                  |
+| -------------------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | ------------------------------------------------- | ------------------- |
+| Test PyTorch full          | .github/workflows/ci-pytorch-test-full.yml  | Run all tests except for accelerator-specific, standalone and slow tests.                                                                                                   | CPU           | (3.7, 1.9), (3.7, 1.12), (3.9, 1.9), (3.9, 1.12)  | linux, mac, windows |
+| Test PyTorch with Conda    | .github/workflows/ci-pytorch-test-conda.yml | Same as ci-pytorch-test-full.yml but with dependencies installed with conda.                                                                                                | CPU           | (3.8, 1.9), (3.8, 1.10), (3.8, 1.11), (3.9, 1.12) | linux               |
+| Test slow                  | .github/workflows/ci-pytorch-test-slow.yml  | Run only slow tests. Slow tests usually need to spawn threads and cannot be speed up or simplified.                                                                         | CPU           | (3.7, 1.11)                                       | linux, mac, windows |
+| pytorch-lightning (IPUs)   | .azure-pipelines/ipu-tests.yml              | Run only IPU-specific tests.                                                                                                                                                | IPU           | (3.8, 1.9)                                        | linux               |
+| pytorch-lightning (HPUs)   | .azure-pipelines/hpu-tests.yml              | Run only HPU-specific tests.                                                                                                                                                | HPU           | (3.8, 1.10)                                       | linux               |
+| pytorch-lightning (GPUs)   | .azure-pipelines/gpu-tests.yml              | Run all CPU and GPU-specific tests, standalone, and examples. Each standalone test needs to be run in separate processes to avoid unwanted interactions between test cases. | GPU           | (3.9, 1.12)                                       | linux               |
+| PyTorchLightning.Benchmark | .azure-pipelines/gpu-benchmark.yml          | Run speed/memory benchmarks for parity with pure PyTorch.                                                                                                                   | GPU           | (3.9, 1.12)                                       | linux               |
+| test-on-tpus               | .circleci/config.yml                        | Run only TPU-specific tests.                                                                                                                                                | TPU           | (3.7, 1.12)                                       | linux               |
 
 - \*Accelerators used in CI
   - GPU: 2 x NVIDIA Tesla V100
@@ -33,15 +33,15 @@
 | --------------------------------- | ----------------------------------------------------------------------------------------- |
 | .codecov.yml                      | Measure test coverage with [codecov.io](https://app.codecov.io/gh/Lightning-AI/lightning) |
 | .github/workflows/code-checks.yml | Check Python typing with [MyPy](https://mypy.readthedocs.io/en/stable/).                  |
-| .github/workflows/ci_schema.yml   | Validate the syntax of workflow files.                                                    |
+| .github/workflows/ci-schema.yml   | Validate the syntax of workflow files.                                                    |
 
 ## Others
 
-| workflow file                          | action                                                                                                                                                                                                                                                      |
-| -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| .github/workflows/ci_dockers.yml       | Build docker images used for testing in CI without pushing to the [Docker Hub](https://hub.docker.com/r/pytorchlightning/pytorch_lightning). Publishing these built images takes place in `.github/workflows/release-docker.yml` which only runs in master. |
-| .github/workflows/ci_pkg-install.yml   | Test if pytorch-lightning is successfully installed using pip.                                                                                                                                                                                              |
-| .github/workflows/events-recurrent.yml | Terminate TPU jobs that live more than one hour to avoid possible resource exhaustion due to hangs.                                                                                                                                                         |
+| workflow file                              | action                                                                                                                                                         |
+| ------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| .github/workflows/cicd-pytorch-dockers.yml | Build docker images used for testing in CI. If run on nightly schedule, push to the [Docker Hub](https://hub.docker.com/r/pytorchlightning/pytorch_lightning). |
+| .github/workflows/ci-pkg-install.yml       | Test if pytorch-lightning is successfully installed using pip.                                                                                                 |
+| .github/workflows/events-recurrent.yml     | Terminate TPU jobs that live more than one hour to avoid possible resource exhaustion due to hangs.                                                            |
 
 ## Deployment
 
@@ -60,4 +60,4 @@
 | .github/stale.yml                                                  | Close inactive issues/PRs sometimes after adding the "won't fix" label to them.                                                                                                                                           |
 | .github/workflows/probot-auto-cc.yml, .github/lightning-probot.yml | Notify maintainers of interest depending on labels added to an issue We utilize lightning-probot forked from PyTorch’s probot.                                                                                            |
 | .pre-commit-config.yaml                                            | pre-commit.ci runs a set of linters and formatters, such as black, flake8 and isort. When formatting is applied, the bot pushes a commit with its change. This configuration is also used for running pre-commit locally. |
-| .github/workflows/ci_pr-gatekeeper.yml                             | Prevent PRs from merging into master without any Grid.ai employees’ approval.                                                                                                                                             |
+| .github/workflows/ci-pr-gatekeeper.yml                             | Prevent PRs from merging into master without any Grid.ai employees’ approval.                                                                                                                                             |
diff --git a/.github/workflows/ci-app_cloud_e2e_test.yml b/.github/workflows/ci-app-cloud-e2e-test.yml
similarity index 99%
rename from .github/workflows/ci-app_cloud_e2e_test.yml
rename to .github/workflows/ci-app-cloud-e2e-test.yml
index 3ad455650a117..81d5e70441771 100644
--- a/.github/workflows/ci-app_cloud_e2e_test.yml
+++ b/.github/workflows/ci-app-cloud-e2e-test.yml
@@ -25,7 +25,7 @@ jobs:
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v2
         with:
           python-version: "3.8"
 
diff --git a/.github/workflows/ci-app_examples.yml b/.github/workflows/ci-app-examples.yml
similarity index 98%
rename from .github/workflows/ci-app_examples.yml
rename to .github/workflows/ci-app-examples.yml
index ec8becd5f70d1..01570f59c2c77 100644
--- a/.github/workflows/ci-app_examples.yml
+++ b/.github/workflows/ci-app-examples.yml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-11, windows-2019]
+        os: [ubuntu-20.04, macOS-11, windows-2022]
         python-version: [3.8]
         requires: ["oldest", "latest"]
 
diff --git a/.github/workflows/ci-app_tests.yml b/.github/workflows/ci-app-tests.yml
similarity index 96%
rename from .github/workflows/ci-app_tests.yml
rename to .github/workflows/ci-app-tests.yml
index 1678dab257301..fe3cc36dc16d3 100644
--- a/.github/workflows/ci-app_tests.yml
+++ b/.github/workflows/ci-app-tests.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-11, windows-2019]
+        os: [ubuntu-20.04, macOS-11, windows-2022]
         python-version: [3.8]
         requires: ["oldest", "latest"]
 
@@ -126,7 +126,7 @@ jobs:
 #      - name: Clone Quick Start Example Repo
 #        uses: actions/checkout@v3
 #        # TODO: this needs to be git submodule
-#        if: matrix.os == 'windows-2019'  # because the install doesn't work on windows
+#        if: matrix.os == 'windows-2022'  # because the install doesn't work on windows
 #        with:
 #          repository: Lightning-AI/lightning-quick-start
 #          ref: 'main'
@@ -134,6 +134,6 @@ jobs:
 #
 #      - name: Lightning Install quick-start
 #        shell: bash
-#        if: matrix.os != 'windows-2019'  # because the install doesn't work on windows
+#        if: matrix.os != 'windows-2022'  # because the install doesn't work on windows
 #        run: |
 #          python -m lightning install app lightning/quick-start -y
diff --git a/.github/workflows/ci_pkg-install.yml b/.github/workflows/ci-pkg-install.yml
similarity index 95%
rename from .github/workflows/ci_pkg-install.yml
rename to .github/workflows/ci-pkg-install.yml
index 342e027b07cfe..a9fdd36693a67 100644
--- a/.github/workflows/ci_pkg-install.yml
+++ b/.github/workflows/ci-pkg-install.yml
@@ -33,7 +33,7 @@ jobs:
       fail-fast: true
       max-parallel: 1
       matrix:
-        os: [ubuntu-20.04, macOS-11, windows-2019]
+        os: [ubuntu-20.04, macOS-11, windows-2022]
         pkg: ["app", "pytorch"]
         python-version: [3.8]  # , 3.9
 
@@ -67,7 +67,7 @@ jobs:
       fail-fast: false
       # max-parallel: 1
       matrix:
-        os: [ubuntu-20.04, macOS-11, windows-2019]
+        os: [ubuntu-20.04, macOS-11, windows-2022]
         pkg: ["", "lightning"]
         python-version: [3.8]  # , 3.9
 
@@ -100,7 +100,7 @@ jobs:
       fail-fast: false
       # max-parallel: 1
       matrix:
-        os: [ubuntu-20.04, macOS-11, windows-2019]
+        os: [ubuntu-20.04, macOS-11, windows-2022]
         python-version: [3.8]  # , 3.9
 
     steps:
diff --git a/.github/workflows/ci_pr-gatekeeper.yml b/.github/workflows/ci-pr-gatekeeper.yml
similarity index 100%
rename from .github/workflows/ci_pr-gatekeeper.yml
rename to .github/workflows/ci-pr-gatekeeper.yml
diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/ci-pytorch-dockers.yml
similarity index 81%
rename from .github/workflows/cicd-pytorch_dockers.yml
rename to .github/workflows/ci-pytorch-dockers.yml
index a6ba2ac4aa5f4..a05dbbb5bc8ef 100644
--- a/.github/workflows/cicd-pytorch_dockers.yml
+++ b/.github/workflows/ci-pytorch-dockers.yml
@@ -8,8 +8,9 @@ on:
     paths:
       - "dockers/**"
       - "!dockers/README.md"
-      - "requirements/**"
       - "requirements.txt"
+      - "requirements/*.txt"
+      - "requirements/pytorch/*"
       - "environment.yml"
       - ".github/workflows/*docker*.yml"
       - "setup.py"
@@ -29,17 +30,22 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image
-        python_version: ["3.9"]
-        pytorch_version: ["1.12"]
+        include:
+          # We only release one docker image per PyTorch version.
+          # The matrix here is the same as the one in release-docker.yml.
+          - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"}
+          - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"}
+          - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
+          - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: docker/setup-buildx-action@v2
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
             PYTORCH_VERSION=${{ matrix.pytorch_version }}
+            CUDA_VERSION=${{ matrix.cuda_version }}
           file: dockers/release/Dockerfile
           push: false  # pushed in release-docker.yml only when PL is released
         timeout-minutes: 50
@@ -53,14 +59,14 @@ jobs:
         python_version: ["3.7"]
         xla_version: ["1.12"]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: docker/setup-buildx-action@v2
-      - uses: docker/login-action@v1
+      - uses: docker/login-action@v2
         if: env.PUSH_TO_HUB == 'true'
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
@@ -85,30 +91,31 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # the config used in '.azure-pipelines/gpu-tests.yml'
-          - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1", ubuntu_version: "20.04"}
-          # latest (used in Tutorials)
-          - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1", ubuntu_version: "20.04"}
-          - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1.1", ubuntu_version: "20.04"}
-          - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"}
+          # These are the base images for PL release docker images,
+          # so include at least all of the combinations in release-dockers.yml.
+          - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"}
+          - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"}
+          - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
+          - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"}
+          # Used in Lightning-AI/tutorials
+          - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: docker/setup-buildx-action@v2
-      - uses: docker/login-action@v1
+      - uses: docker/login-action@v2
         if: env.PUSH_TO_HUB == 'true'
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
             PYTORCH_VERSION=${{ matrix.pytorch_version }}
             CUDA_VERSION=${{ matrix.cuda_version }}
-            UBUNTU_VERSION=${{ matrix.ubuntu_version }}
           file: dockers/base-cuda/Dockerfile
           push: ${{ env.PUSH_TO_HUB }}
-          tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
+          tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}
         timeout-minutes: 95
       - uses: ravsamhq/notify-slack-action@v1
         if: failure() && env.PUSH_TO_HUB == 'true'
@@ -126,25 +133,23 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"}
-          - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1.1"}
-          - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
-          #  nightly: add when there's a release candidate
-          #  - {python_version: "3.9", pytorch_version: "1.12"}
+          - {python_version: "3.8", pytorch_version: "1.9"}
+          - {python_version: "3.8", pytorch_version: "1.10"}
+          - {python_version: "3.9", pytorch_version: "1.11"}
+          - {python_version: "3.9", pytorch_version: "1.12"}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: docker/setup-buildx-action@v2
-      - uses: docker/login-action@v1
+      - uses: docker/login-action@v2
         if: env.PUSH_TO_HUB == 'true'
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
             PYTORCH_VERSION=${{ matrix.pytorch_version }}
-            CUDA_VERSION=${{ matrix.cuda_version }}
           file: dockers/base-conda/Dockerfile
           push: ${{ env.PUSH_TO_HUB }}
           tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
@@ -168,14 +173,14 @@ jobs:
           # the config used in 'dockers/ci-runner-ipu/Dockerfile'
           - {python_version: "3.9", pytorch_version: "1.9"}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: docker/setup-buildx-action@v2
-      - uses: docker/login-action@v1
+      - uses: docker/login-action@v2
         if: env.PUSH_TO_HUB == 'true'
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
@@ -184,7 +189,7 @@ jobs:
           push: ${{ env.PUSH_TO_HUB }}
           tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
         timeout-minutes: 100
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
@@ -199,7 +204,7 @@ jobs:
           status: ${{ job.status }}
           token: ${{ secrets.GITHUB_TOKEN }}
           notification_title: ${{ format('IPU; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }}
-          message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01BULUS2BG>'  # SeanNaren
+          message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01GD29QCAV>'  # kaushikb11
         env:
           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
 
@@ -212,14 +217,14 @@ jobs:
           # the config used in 'dockers/ci-runner-hpu/Dockerfile'
           - {gaudi_version: "1.5.0", pytorch_version: "1.11.0"}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: docker/setup-buildx-action@v2
-      - uses: docker/login-action@v1
+      - uses: docker/login-action@v2
         if: env.PUSH_TO_HUB == 'true'
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      - uses: docker/build-push-action@v2
+      - uses: docker/build-push-action@v3
         with:
           build-args: |
             DIST=latest
@@ -243,10 +248,10 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Build Conda Docker
         # publish master/release
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v3
         with:
           file: dockers/nvidia/Dockerfile
           push: false
diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch-test-conda.yml
similarity index 97%
rename from .github/workflows/ci-pytorch_test-conda.yml
rename to .github/workflows/ci-pytorch-test-conda.yml
index 777ec2af759a0..3498f087ef0aa 100644
--- a/.github/workflows/ci-pytorch_test-conda.yml
+++ b/.github/workflows/ci-pytorch-test-conda.yml
@@ -22,13 +22,11 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # nightly: add when there's a release candidate
         include:
           - {python-version: "3.8", pytorch-version: "1.9"}
           - {python-version: "3.8", pytorch-version: "1.10"}
           - {python-version: "3.9", pytorch-version: "1.11"}
           - {python-version: "3.9", pytorch-version: "1.12"}
-
     timeout-minutes: 30
 
     steps:
@@ -39,7 +37,7 @@ jobs:
 
     - name: Get changed files
       id: changed-files
-      uses: tj-actions/changed-files@v24
+      uses: tj-actions/changed-files@v23.1
 
     - name: Decide if the test should be skipped
       id: skip
diff --git a/.github/workflows/ci-pytorch_test-full.yml b/.github/workflows/ci-pytorch-test-full.yml
similarity index 97%
rename from .github/workflows/ci-pytorch_test-full.yml
rename to .github/workflows/ci-pytorch-test-full.yml
index 445707d340c4b..173e2a44a61f4 100644
--- a/.github/workflows/ci-pytorch_test-full.yml
+++ b/.github/workflows/ci-pytorch-test-full.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, windows-2019, macOS-11]
+        os: [ubuntu-20.04, windows-2022, macOS-11]
         python-version: ["3.7", "3.10"]  # minimum, maximum
         requires: ["oldest", "latest"]
         release: ["stable"]
@@ -39,13 +39,13 @@ jobs:
 
     - name: Get changed files
       id: changed-files
-      uses: tj-actions/changed-files@v24
+      uses: tj-actions/changed-files@v23.1
 
     - name: Decide if the test should be skipped
       id: skip
       shell: bash -l {0}
       run: |
-        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.github/workflows/ci-pytorch_test-full.yml'
+        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
         echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
         MATCHES=$(cat changed_files.txt | grep -E $FILTER)
         echo $MATCHES
@@ -59,7 +59,7 @@ jobs:
 
     - name: Set up Python ${{ matrix.python-version }}
       if: ${{ (steps.skip.outputs.continue == '1') }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/ci-pytorch_test-slow.yml b/.github/workflows/ci-pytorch-test-slow.yml
similarity index 95%
rename from .github/workflows/ci-pytorch_test-slow.yml
rename to .github/workflows/ci-pytorch-test-slow.yml
index b3756bbe8c2f7..0bb9916ee302a 100644
--- a/.github/workflows/ci-pytorch_test-slow.yml
+++ b/.github/workflows/ci-pytorch-test-slow.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, windows-2019, macOS-11]
+        os: [ubuntu-20.04, windows-2022, macOS-11]
         # same config as '.azure-pipelines/gpu-tests.yml'
         python-version: ["3.7"]
         pytorch-version: ["1.11"]
@@ -30,13 +30,13 @@ jobs:
 
     - name: Get changed files
       id: changed-files
-      uses: tj-actions/changed-files@v24
+      uses: tj-actions/changed-files@v23.1
 
     - name: Decide if the test should be skipped
       id: skip
       shell: bash -l {0}
       run: |
-        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.github/workflows/ci-pytorch_test-slow.yml'
+        FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
         echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
         MATCHES=$(cat changed_files.txt | grep -E $FILTER)
         echo $MATCHES
@@ -48,7 +48,7 @@ jobs:
             echo "::set-output name=continue::1"
         fi
 
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v2
       if: ${{ (steps.skip.outputs.continue == '1') }}
       with:
         python-version: ${{ matrix.python-version }}
diff --git a/.github/workflows/ci_schema.yml b/.github/workflows/ci-schema.yml
similarity index 100%
rename from .github/workflows/ci_schema.yml
rename to .github/workflows/ci-schema.yml
diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml
index 977118b644ef3..5b5a9aec778be 100644
--- a/.github/workflows/docs-checks.yml
+++ b/.github/workflows/docs-checks.yml
@@ -42,13 +42,13 @@ jobs:
       - name: Install dependencies
         env:
           FREEZE_REQUIREMENTS: 1
+          PACKAGE_NAME: ${{ matrix.pkg }}
         run: |
           sudo apt-get update
           sudo apt-get install -y cmake pandoc
           pip --version
-          pip install -q fire
           # python -m pip install --upgrade --user pip
-          pip install -e . --quiet -r requirements/${{ matrix.pkg }}/base.txt -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
+          pip install -e . --quiet -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
           pip install -r requirements/${{ matrix.pkg }}/devel.txt
           pip list
         shell: bash
@@ -91,11 +91,12 @@ jobs:
       - name: Install dependencies
         env:
           FREEZE_REQUIREMENTS: 1
+          PACKAGE_NAME: ${{ matrix.pkg }}
         run: |
           sudo apt-get update
           sudo apt-get install -y cmake pandoc
           pip --version
-          pip install -e . --quiet -r requirements/${{ matrix.pkg }}/base.txt -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
+          pip install -e . --quiet -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
           # install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
           sudo apt-get update && sudo apt-get install -y texlive-latex-extra dvipng texlive-pictures
           pip list
diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
index 9d87f1a582fb1..6901a24204683 100644
--- a/.github/workflows/release-docker.yml
+++ b/.github/workflows/release-docker.yml
@@ -1,6 +1,5 @@
 name: Docker
-# https://www.docker.com/blog/first-docker-github-action-is-here
-# https://github.com/docker/build-push-action
+
 on:
   push:
     branches: [master, "release/*"]
@@ -15,8 +14,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python_version: ["3.7", "3.8", "3.9"]
-        pytorch_version: ["1.9", "1.10"]
+        include:
+          # We only release one docker image per PyTorch version.
+          - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"}
+          - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"}
+          - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
+          - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"}
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -32,19 +35,29 @@ jobs:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
           dockerfile: dockers/release/Dockerfile
-          build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
-          tags: "${{ steps.get_version.outputs.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
+          build_args: |
+            PYTHON_VERSION=${{ matrix.python_version }}
+            PYTORCH_VERSION=${{ matrix.pytorch_version }}
+            CUDA_VERSION=${{ matrix.cuda_version }}
+            LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
+          tags: |
+            ${{ steps.get_version.outputs.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}
+            latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}
         timeout-minutes: 55
 
       - name: Publish Latest to Docker
         uses: docker/build-push-action@v1.1.0
-        # only on releases and latest Python and PyTorch
-        if: matrix.python_version == '3.9' && matrix.pytorch_version == '1.10'
+        # Only latest Python and PyTorch
+        if: matrix.python_version == '3.9' && matrix.pytorch_version == '1.12'
         with:
           repository: pytorchlightning/pytorch_lightning
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
           dockerfile: dockers/release/Dockerfile
-          build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
+          build_args: |
+            PYTHON_VERSION=${{ matrix.python_version }}
+            PYTORCH_VERSION=${{ matrix.pytorch_version }}
+            CUDA_VERSION=${{ matrix.cuda_version }}
+            LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
           tags: "latest"
         timeout-minutes: 55
diff --git a/.gitignore b/.gitignore
index 719f291a492ca..259d9f271189c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,9 @@ hars*
 artifacts/*
 *docs/examples*
 *docs/source-app/api*
+
+# tutorials
+our_model.tar
+test.png
+saved_models
+data/
diff --git a/README.md b/README.md
index 2fef343425f17..9c03e3707ec24 100644
--- a/README.md
+++ b/README.md
@@ -80,21 +80,24 @@ ______________________________________________________________________
 
 ## Continuous Integration
 
-Lightning is rigorously tested across multiple GPUs, TPUs CPUs and against major Python and PyTorch versions.
+Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs and against major Python and PyTorch versions.
 
 <details>
   <summary>Current build statuses</summary>
 
 <center>
 
-|   System / PyTorch ver.    |                                                                                                       1.8 (LTS, min. req.)                                                                                                        |                                                                                                    1.9                                                                                                     |                                                                                               1.10 (latest)                                                                                                |
-| :------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  Linux py3.7 \[GPUs\*\*\]  | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/PL.pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=6&branchName=master) |                                                                                                     -                                                                                                      |                                                                                                     -                                                                                                      |
-| Linux py3.7 \[TPUs\*\*\*\] |                                        [![CircleCI](https://circleci.com/gh/Lightning-AI/lightning/tree/master.svg?style=svg)](https://circleci.com/gh/Lightning-AI/lightning/tree/master)                                        |                                                                                                     -                                                                                                      |                                                                                                     -                                                                                                      |
-|  Linux py3.8 (with Conda   |            [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml)             | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml) |
-|      Linux py3.{7,9}       |                                                                                                                 -                                                                                                                 |                                                                                                     -                                                                                                      |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml)  |
-|       OSX py3.{7,9}        |                                                                                                                 -                                                                                                                 |                                                                                                     -                                                                                                      |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml)  |
-|     Windows py3.{7,9}      |                                                                                                                 -                                                                                                                 |                                                                                                     -                                                                                                      |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml)  |
+|   System / PyTorch ver.    |                                                                                                               1.9                                                                                                               |                                                                                                              1.10                                                                                                               |                                                                                                       1.12 (latest)                                                                                                        |
+| :------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|  Linux py3.7 \[GPUs\*\*\]  |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |                                                                                                             -                                                                                                              |
+| Linux py3.7 \[TPUs\*\*\*\] |                                       [![CircleCI](https://circleci.com/gh/Lightning-AI/lightning/tree/master.svg?style=svg)](https://circleci.com/gh/Lightning-AI/lightning/tree/master)                                       |                                                                                                                -                                                                                                                |                                                                                                             -                                                                                                              |
+|    Linux py3.8 \[IPUs\]    | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=master) |                                                                                                                -                                                                                                                |                                                                                                             -                                                                                                              |
+|    Linux py3.8 \[HPUs\]    |                                                                                                                -                                                                                                                | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) |                                                                                                             -                                                                                                              |
+|  Linux py3.8 (with Conda)  |   [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml)    |   [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml)    |                                                                                                             -                                                                                                              |
+|  Linux py3.9 (with Conda)  |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml) |
+|      Linux py3.{7,9}       |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)  |
+|       OSX py3.{7,9}        |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)  |
+|     Windows py3.{7,9}      |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)  |
 
 - _\*\* tests run on two NVIDIA P100_
 - _\*\*\* tests run on Google GKE TPUv2/3. TPU py3.7 means we support Colab and Kaggle env._
@@ -136,8 +139,8 @@ conda install pytorch-lightning -c conda-forge
 
 The actual status of 1.7 \[stable\] is the following:
 
-[![Test PyTorch full](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-full.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-full.yml?query=branch%3Arelease%2Fpytorch)
-[![Test PyTorch with Conda](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-conda.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-conda.yml?query=branch%3Arelease%2Fpytorch)
+[![Test PyTorch full](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml?query=branch%3Arelease%2Fpytorch)
+[![Test PyTorch with Conda](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml?query=branch%3Arelease%2Fpytorch)
 [![TPU tests](https://dl.circleci.com/status-badge/img/gh/Lightning-AI/lightning/tree/release%2Fpytorch.svg?style=shield)](https://dl.circleci.com/status-badge/redirect/gh/Lightning-AI/lightning/tree/release%2Fpytorch)
 [![Check Docs](https://github.com/Lightning-AI/lightning/actions/workflows/docs-checks.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/docs-checks.yml?query=branch%3Arelease%2Fpytorch)
 
diff --git a/dockers/README.md b/dockers/README.md
index 533c85739f528..b1ff9826b6c1f 100644
--- a/dockers/README.md
+++ b/dockers/README.md
@@ -1,36 +1,17 @@
 # Docker images
 
-## Builds images form attached Dockerfiles
+## Build images from Dockerfiles
 
 You can build it on your own, note it takes lots of time, be prepared.
 
 ```bash
-git clone <git-repository>
-docker image build -t pytorch-lightning:latest -f dockers/conda/Dockerfile .
-```
-
-or with specific arguments
-
-```bash
-git clone <git-repository>
-docker image build \
-    -t pytorch-lightning:base-cuda-py3.9-pt1.10 \
-    -f dockers/base-cuda/Dockerfile \
-    --build-arg PYTHON_VERSION=3.9 \
-    --build-arg PYTORCH_VERSION=1.10 \
-    .
-```
+git clone https://github.com/Lightning-AI/lightning.git
 
-or nightly version from Conda
+# build with the default arguments
+docker image build -t pytorch-lightning:latest -f dockers/base-cuda/Dockerfile .
 
-```bash
-git clone <git-repository>
-docker image build \
-    -t pytorch-lightning:base-conda-py3.9-pt1.11 \
-    -f dockers/base-conda/Dockerfile \
-    --build-arg PYTHON_VERSION=3.9 \
-    --build-arg PYTORCH_VERSION=1.11 \
-    .
+# build with specific arguments
+docker image build -t pytorch-lightning:base-cuda-py3.9-torch1.11-cuda11.3.1 -f dockers/base-cuda/Dockerfile --build-arg PYTHON_VERSION=3.9 --build-arg PYTORCH_VERSION=1.11 --build-arg CUDA_VERSION=11.3.1 .
 ```
 
 To run your docker use
@@ -49,7 +30,7 @@ docker image rm pytorch-lightning:latest
 
 ## Run docker image with GPUs
 
-To run docker image with access to you GPUs you need to install
+To run docker image with access to your GPUs, you need to install
 
 ```bash
 # Add the package repositories
@@ -61,10 +42,10 @@ sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
 sudo systemctl restart docker
 ```
 
-and later run the docker image with `--gpus all` so for example
+and later run the docker image with `--gpus all`. For example,
 
 ```
-docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.10
+docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11-cuda11.3.1
 ```
 
 ## Run Jupyter server
@@ -73,15 +54,11 @@ Inspiration comes from https://u.group/thinking/how-to-put-jupyter-notebooks-in-
 
 1. Build the docker image:
    ```bash
-   docker image build \
-       -t pytorch-lightning:v1.3.1 \
-       -f dockers/nvidia/Dockerfile \
-       --build-arg LIGHTNING_VERSION=1.3.1 \
-       .
+   docker image build -t pytorch-lightning:v1.6.5 -f dockers/nvidia/Dockerfile --build-arg LIGHTNING_VERSION=1.6.5 .
    ```
 1. start the server and map ports:
    ```bash
-   docker run --rm -it --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -p 8888:8888 pytorch-lightning:v1.3.1
+   docker run --rm -it --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -p 8888:8888 pytorch-lightning:v1.6.5
    ```
 1. Connect in local browser:
    - copy the generated path e.g. `http://hostname:8888/?token=0719fa7e1729778b0cec363541a608d5003e26d4910983c6`
diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile
index cb393c91dfbe0..c39e66509188c 100644
--- a/dockers/release/Dockerfile
+++ b/dockers/release/Dockerfile
@@ -14,8 +14,9 @@
 
 ARG PYTHON_VERSION=3.9
 ARG PYTORCH_VERSION=1.11
+ARG CUDA_VERSION=11.3.1
 
-FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
+FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}-cuda${CUDA_VERSION}
 
 LABEL maintainer="Lightning-AI <https://github.com/Lightning-AI>"
 
diff --git a/docs/source-pytorch/api_references.rst b/docs/source-pytorch/api_references.rst
index db4fc1e2c4cf8..ce7723e418e77 100644
--- a/docs/source-pytorch/api_references.rst
+++ b/docs/source-pytorch/api_references.rst
@@ -173,6 +173,7 @@ precision
     DeepSpeedPrecisionPlugin
     DoublePrecisionPlugin
     FullyShardedNativeMixedPrecisionPlugin
+    FullyShardedNativeNativeMixedPrecisionPlugin
     HPUPrecisionPlugin
     IPUPrecisionPlugin
     MixedPrecisionPlugin
diff --git a/docs/source-pytorch/extensions/plugins.rst b/docs/source-pytorch/extensions/plugins.rst
index a0dbefd141464..27aff0c11fdcb 100644
--- a/docs/source-pytorch/extensions/plugins.rst
+++ b/docs/source-pytorch/extensions/plugins.rst
@@ -56,6 +56,7 @@ The full list of built-in precision plugins is listed below.
     DeepSpeedPrecisionPlugin
     DoublePrecisionPlugin
     FullyShardedNativeMixedPrecisionPlugin
+    FullyShardedNativeNativeMixedPrecisionPlugin
     HPUPrecisionPlugin
     IPUPrecisionPlugin
     MixedPrecisionPlugin
diff --git a/requirements/app/docs.txt b/requirements/app/docs.txt
index 63ac1f289331f..c189d6034ab28 100644
--- a/requirements/app/docs.txt
+++ b/requirements/app/docs.txt
@@ -1,17 +1,8 @@
-sphinx>=4.0,<5.0
-myst-parser>=0.15,<0.17
-nbsphinx>=0.8.5, <=0.8.9
+-r ../docs.txt
+
 ipython[notebook]
 ipython_genutils
-pandoc>=1.0, <=2.2
-docutils>=0.16, <0.19
-sphinxcontrib-fulltoc>=1.0, <=1.2.0
-sphinxcontrib-mockautodoc
+pytorch-lightning
 
-https://storage.googleapis.com/grid-packages/lightning-ai-sphinx-theme/build-31-rc1.zip
-sphinx-autodoc-typehints>=1.0,<1.15  # v1.15 failing on master (#11405)
-sphinx-paramlinks>=0.5.1, <=0.5.4
-sphinx-togglebutton>=0.2, <=0.3.2
-sphinx-copybutton>=0.3, <=0.5.0
 sphinx-autobuild
-jinja2>=3.0.0,<3.1.0
+https://storage.googleapis.com/grid-packages/lightning-ai-sphinx-theme/build-31.3.zip
diff --git a/requirements/docs.txt b/requirements/docs.txt
new file mode 100644
index 0000000000000..1b00471602c60
--- /dev/null
+++ b/requirements/docs.txt
@@ -0,0 +1,13 @@
+sphinx>=4.0, <5.0
+myst-parser>=0.15, <0.17
+nbsphinx>=0.8.5, <=0.8.9
+pandoc>=1.0, <=2.2
+docutils>=0.16, <0.19
+sphinxcontrib-fulltoc>=1.0, <=1.2.0
+sphinxcontrib-mockautodoc
+sphinx-autodoc-typehints>=1.11, <1.15  # strict; v1.15 failing on master (#11405)
+sphinx-paramlinks>=0.5.1, <=0.5.4
+sphinx-togglebutton>=0.2, <=0.3.2
+sphinx-copybutton>=0.3, <=0.5.0
+sphinx-multiproject
+jinja2>=3.0.0,<3.1.0
diff --git a/requirements/pytorch/docs.txt b/requirements/pytorch/docs.txt
index 50e7c2049f6f6..474620b1e74b8 100644
--- a/requirements/pytorch/docs.txt
+++ b/requirements/pytorch/docs.txt
@@ -1,17 +1,6 @@
-sphinx>=4.0,<5.0
-myst-parser>=0.15,<0.17
-nbsphinx>=0.8.5, <=0.8.9
+-r ../docs.txt
+
 ipython[notebook]
-pandoc>=1.0, <=2.2
-docutils>=0.16, <0.19
-sphinxcontrib-fulltoc>=1.0, <=1.2.0
-sphinxcontrib-mockautodoc
 pt-lightning-sphinx-theme @ https://github.com/Lightning-AI/lightning_sphinx_theme/archive/master.zip
-sphinx-autodoc-typehints>=1.11,<1.15  # strict; v1.15 failing on master (#11405)
-sphinx-paramlinks>=0.5.1, <=0.5.4
-sphinx-togglebutton>=0.2, <=0.3.2
-sphinx-copybutton>=0.3, <=0.5.0
-typing-extensions  # already in `requirements.txt` but the docs CI job does not install it
-jinja2>=3.0.0,<3.1.0
 
 -r ../../_notebooks/.actions/requirements.txt
diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt
index c386c5581cc42..20b6c1b8dbc12 100644
--- a/requirements/pytorch/extra.txt
+++ b/requirements/pytorch/extra.txt
@@ -7,5 +7,5 @@ torchtext>=0.10.*, <0.14.0
 omegaconf>=2.0.5, <2.3.0
 hydra-core>=1.0.5, <1.3.0
 jsonargparse[signatures]>=4.12.0, <=4.12.0
-gcsfs>=2021.5.0, <2022.6.0
+gcsfs>=2021.5.0, <2022.8.0
 rich>=10.14.0, !=10.15.0.a, <13.0.0
diff --git a/requirements/pytorch/strategies.txt b/requirements/pytorch/strategies.txt
index 4e916fbc6c61f..c5fc92a67a837 100644
--- a/requirements/pytorch/strategies.txt
+++ b/requirements/pytorch/strategies.txt
@@ -2,7 +2,7 @@
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
 
 fairscale>=0.4.5, <=0.4.6
-deepspeed>=0.6.0, <0.7.0
+deepspeed>=0.6.0, <=0.7.0
 # no need to install with [pytorch] as pytorch is already installed
 horovod>=0.21.2, !=0.24.0, <0.25.1
 hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux'
diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt
index c155400a3d35f..f8bd5793a0af6 100644
--- a/requirements/pytorch/test.txt
+++ b/requirements/pytorch/test.txt
@@ -10,7 +10,7 @@ mypy==0.971
 # needed in tests
 cloudpickle>=1.3, <=2.1.0
 scikit-learn>0.22.1, <=1.1.1
-onnxruntime<=1.12.0
+onnxruntime<1.13.0
 psutil<=5.9.1 # for `DeviceStatsMonitor`
 pandas>1.0, <=1.4.3  # needed in benchmarks
 fastapi<=0.79.0
diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
index 835838342e610..92913fcdf760f 100644
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@@ -4,6 +4,36 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
+## [1.7.2] - 2022-08-16
+
+### Added
+
+- Added `FullyShardedNativeNativeMixedPrecisionPlugin` to handle precision for `DDPFullyShardedNativeStrategy` ([#14092](https://github.com/Lightning-AI/lightning/pull/14092))
+- Added profiling to these hooks: `on_before_batch_transfer`, `transfer_batch_to_device`, `on_after_batch_transfer`, `configure_gradient_clipping`, `clip_gradients` ([#14069](https://github.com/Lightning-AI/lightning/pull/14069))
+
+### Changed
+
+- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967))
+- Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961))
+- The default project name in `WandbLogger` is now "lightning_logs" ([#14145](https://github.com/Lightning-AI/lightning/pull/14145))
+- The `WandbLogger.name` property no longer returns the name of the experiment, and instead returns the project's name ([#14145](https://github.com/Lightning-AI/lightning/pull/14145))
+
+### Fixed
+
+- Fixed a bug that caused spurious `AttributeError` when multiple `DataLoader` classes are imported ([#14117](https://github.com/Lightning-AI/lightning/pull/14117))
+- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/lightning/pull/14061))
+- Fixed saving hyperparameters in a composition where the parent class is not a `LightningModule` or `LightningDataModule` ([#14151](https://github.com/Lightning-AI/lightning/pull/14151))
+- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/lightning/pull/14061))
+- Fixed the device placement when `LightningModule.cuda()` gets called without specifying a device index and the current cuda device was not 0 ([#14128](https://github.com/Lightning-AI/lightning/pull/14128))
+- Avoided false positive warning about using `sync_dist` when using torchmetrics ([#14143](https://github.com/Lightning-AI/lightning/pull/14143))
+- Avoid `metadata.entry_points` deprecation warning on Python 3.10 ([#14052](https://github.com/Lightning-AI/lightning/pull/14052))
+- Avoid raising the sampler warning if num_replicas=1 ([#14097](https://github.com/Lightning-AI/lightning/pull/14097))
+- Fixed resuming from a checkpoint when using Stochastic Weight Averaging (SWA) ([#9938](https://github.com/Lightning-AI/lightning/pull/9938))
+- Avoided requiring the FairScale package to use precision with the fsdp native strategy ([#14092](https://github.com/Lightning-AI/lightning/pull/14092))
+- Fixed an issue in which the default name for a run in `WandbLogger` would be set to the project name instead of a randomly generated string ([#14145](https://github.com/Lightning-AI/lightning/pull/14145))
+- Fixed not preserving set attributes on `DataLoader` and `BatchSampler` when instantiated inside `*_dataloader` hooks ([#14212](https://github.com/Lightning-AI/lightning/pull/14212))
+
+
 ## [1.7.1] - 2022-08-09
 
 ### Fixed
diff --git a/src/pytorch_lightning/README.md b/src/pytorch_lightning/README.md
index eb1a42730b5f0..b57aea6fae147 100644
--- a/src/pytorch_lightning/README.md
+++ b/src/pytorch_lightning/README.md
@@ -78,17 +78,17 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
 
 <center>
 
-|   System / PyTorch ver.    |                                                                                                                1.9                                                                                                                |                                                                                                               1.10                                                                                                                |                                                                                               1.12 (latest)                                                                                                |
-| :------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  Linux py3.7 \[GPUs\*\*\]  |                                                                                                                 -                                                                                                                 |                                                                                                                 -                                                                                                                 |                                                                                                     -                                                                                                      |
-| Linux py3.7 \[TPUs\*\*\*\] |                                        [![CircleCI](https://circleci.com/gh/Lightning-AI/lightning/tree/master.svg?style=svg)](https://circleci.com/gh/Lightning-AI/lightning/tree/master)                                        |                                                                                                                 -                                                                                                                 |                                                                                                     -                                                                                                      |
-|    Linux py3.8 \[IPUs\]    | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/PL.pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=6&branchName=master) |                                                                                                                 -                                                                                                                 |                                                                                                     -                                                                                                      |
-|    Linux py3.8 \[HPUs\]    |                                                                                                                 -                                                                                                                 | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/PL.pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=6&branchName=master) |                                                                                                     -                                                                                                      |
-|  Linux py3.8 (with Conda)  |            [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml)             |            [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml)             |                                                                                                     -                                                                                                      |
-|  Linux py3.9 (with Conda)  |                                                                                                                 -                                                                                                                 |                                                                                                                 -                                                                                                                 | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-conda.yml) |
-|      Linux py3.{7,9}       |                                                                                                                 -                                                                                                                 |                                                                                                                 -                                                                                                                 |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml)  |
-|       OSX py3.{7,9}        |                                                                                                                 -                                                                                                                 |                                                                                                                 -                                                                                                                 |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml)  |
-|     Windows py3.{7,9}      |                                                                                                                 -                                                                                                                 |                                                                                                                 -                                                                                                                 |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci_test-full.yml)  |
+|   System / PyTorch ver.    |                                                                                                               1.9                                                                                                               |                                                                                                              1.10                                                                                                               |                                                                                                       1.12 (latest)                                                                                                        |
+| :------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|  Linux py3.7 \[GPUs\*\*\]  |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |                                                                                                             -                                                                                                              |
+| Linux py3.7 \[TPUs\*\*\*\] |                                       [![CircleCI](https://circleci.com/gh/Lightning-AI/lightning/tree/master.svg?style=svg)](https://circleci.com/gh/Lightning-AI/lightning/tree/master)                                       |                                                                                                                -                                                                                                                |                                                                                                             -                                                                                                              |
+|    Linux py3.8 \[IPUs\]    | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=master) |                                                                                                                -                                                                                                                |                                                                                                             -                                                                                                              |
+|    Linux py3.8 \[HPUs\]    |                                                                                                                -                                                                                                                | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) |                                                                                                             -                                                                                                              |
+|  Linux py3.8 (with Conda)  |   [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml)    |   [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml)    |                                                                                                             -                                                                                                              |
+|  Linux py3.9 (with Conda)  |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml) |
+|      Linux py3.{7,9}       |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)  |
+|       OSX py3.{7,9}        |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)  |
+|     Windows py3.{7,9}      |                                                                                                                -                                                                                                                |                                                                                                                -                                                                                                                |  [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)  |
 
 - _\*\* tests run on two NVIDIA P100_
 - _\*\*\* tests run on Google GKE TPUv2/3. TPU py3.7 means we support Colab and Kaggle env._
@@ -130,8 +130,8 @@ conda install pytorch-lightning -c conda-forge
 
 The actual status of stable is the following:
 
-[![Test PyTorch full](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-full.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-full.yml)
-[![Test PyTorch with Conda](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-conda.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch_test-conda.yml)
+[![Test PyTorch full](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-full.yml)
+[![Test PyTorch with Conda](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml/badge.svg?branch=release%2Fpytorch&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-pytorch-test-conda.yml)
 [![GPU](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(GPUs)?branchName=release%2Fpytorch>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=release%2Fpytorch)
 [![TPU](https://dl.circleci.com/status-badge/img/gh/Lightning-AI/lightning/tree/release%2Fpytorch.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/Lightning-AI/lightning/tree/release%2Fpytorch)
 [![IPU](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(IPUs)?branchName=release%2Fpytorch>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=release%2Fpytorch)
diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py
index 116d5667841f3..2196826f840ed 100644
--- a/src/pytorch_lightning/__version__.py
+++ b/src/pytorch_lightning/__version__.py
@@ -1 +1 @@
-version = "1.7.1"
+version = "1.7.2"
diff --git a/src/pytorch_lightning/callbacks/stochastic_weight_avg.py b/src/pytorch_lightning/callbacks/stochastic_weight_avg.py
index 20a3dcc3f0f26..6650bb3f0c479 100644
--- a/src/pytorch_lightning/callbacks/stochastic_weight_avg.py
+++ b/src/pytorch_lightning/callbacks/stochastic_weight_avg.py
@@ -16,7 +16,7 @@
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 """
 from copy import deepcopy
-from typing import Any, Callable, cast, List, Optional, Union
+from typing import Any, Callable, cast, Dict, List, Optional, Union
 
 import torch
 from torch import nn, Tensor
@@ -24,6 +24,7 @@
 
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks.callback import Callback
+from pytorch_lightning.strategies import DDPFullyShardedStrategy, DeepSpeedStrategy
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.rank_zero import rank_zero_info, rank_zero_warn
 from pytorch_lightning.utilities.types import _LRScheduler, LRSchedulerConfig
@@ -112,15 +113,22 @@ def __init__(
         if device is not None and not isinstance(device, (torch.device, str)):
             raise MisconfigurationException(f"device is expected to be a torch.device or a str. Found {device}")
 
+        self.n_averaged: Optional[torch.Tensor] = None
         self._swa_epoch_start = swa_epoch_start
         self._swa_lrs = swa_lrs
         self._annealing_epochs = annealing_epochs
         self._annealing_strategy = annealing_strategy
         self._avg_fn = avg_fn or self.avg_fn
         self._device = device
-        self._max_epochs: int
-        self._model_contains_batch_norm: bool
+        self._model_contains_batch_norm: Optional[bool] = None
         self._average_model: "pl.LightningModule"
+        self._initialized = False
+        self._swa_scheduler: Optional[_LRScheduler] = None
+        self._scheduler_state: Optional[Dict] = None
+        self._init_n_averaged = 0
+        self._latest_update_epoch = -1
+        self.momenta: Optional[Dict[nn.modules.batchnorm._BatchNorm, float]] = None
+        self._max_epochs: int
 
     @property
     def swa_start(self) -> int:
@@ -147,6 +155,9 @@ def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -
         if len(trainer.lr_scheduler_configs) > 1:
             raise MisconfigurationException("SWA currently not supported for more than 1 `lr_scheduler`.")
 
+        if isinstance(trainer.strategy, (DDPFullyShardedStrategy, DeepSpeedStrategy)):
+            raise MisconfigurationException("SWA does not currently support sharded models.")
+
         if isinstance(self._swa_epoch_start, float):
             self._swa_epoch_start = int(trainer.max_epochs * self._swa_epoch_start)
 
@@ -158,8 +169,13 @@ def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -
             assert trainer.fit_loop.max_epochs is not None
             trainer.fit_loop.max_epochs += 1
 
+        if self._scheduler_state is not None:
+            self._clear_schedulers(trainer)
+
     def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
-        if trainer.current_epoch == self.swa_start:
+        if (not self._initialized) and (self.swa_start <= trainer.current_epoch <= self.swa_end):
+            self._initialized = True
+
             # move average model to request device.
             self._average_model = self._average_model.to(self._device or pl_module.device)
 
@@ -180,6 +196,17 @@ def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningMo
                     last_epoch=trainer.max_epochs if self._annealing_strategy == "cos" else -1,
                 ),
             )
+            if self._scheduler_state is not None:
+                # Restore scheduler state from checkpoint
+                self._swa_scheduler.load_state_dict(self._scheduler_state)
+            elif trainer.current_epoch != self.swa_start:
+                # Log a warning if we're initializing after start without any checkpoint data,
+                # as behaviour will be different compared to having checkpoint data.
+                rank_zero_warn(
+                    "SWA is initializing after swa_start without any checkpoint data. "
+                    "This may be caused by loading a checkpoint from an older version of PyTorch Lightning."
+                )
+
             # We assert that there is only one optimizer on fit start, so know opt_idx is always 0
             default_scheduler_cfg = LRSchedulerConfig(self._swa_scheduler, opt_idx=0)
             assert default_scheduler_cfg.interval == "epoch" and default_scheduler_cfg.frequency == 1
@@ -196,14 +223,18 @@ def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningMo
             else:
                 trainer.lr_scheduler_configs.append(default_scheduler_cfg)
 
-            self.n_averaged = torch.tensor(0, dtype=torch.long, device=pl_module.device)
+            if self.n_averaged is None:
+                self.n_averaged = torch.tensor(self._init_n_averaged, dtype=torch.long, device=pl_module.device)
 
-        if self.swa_start <= trainer.current_epoch <= self.swa_end:
+        if (self.swa_start <= trainer.current_epoch <= self.swa_end) and (
+            trainer.current_epoch > self._latest_update_epoch
+        ):
+            assert self.n_averaged is not None
             self.update_parameters(self._average_model, pl_module, self.n_averaged, self._avg_fn)
+            self._latest_update_epoch = trainer.current_epoch
 
         # Note: No > here in case the callback is saved with the model and training continues
         if trainer.current_epoch == self.swa_end + 1:
-
             # Transfer weights from average model to pl_module
             self.transfer_weights(self._average_model, pl_module)
 
@@ -265,6 +296,7 @@ def reset_batch_norm_and_save_state(self, pl_module: "pl.LightningModule") -> No
 
     def reset_momenta(self) -> None:
         """Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L164-L165."""
+        assert self.momenta is not None
         for bn_module in self.momenta:
             bn_module.momentum = self.momenta[bn_module]
 
@@ -285,3 +317,35 @@ def update_parameters(
     def avg_fn(averaged_model_parameter: Tensor, model_parameter: Tensor, num_averaged: Tensor) -> Tensor:
         """Adapted from https://github.com/pytorch/pytorch/blob/v1.7.1/torch/optim/swa_utils.py#L95-L97."""
         return averaged_model_parameter + (model_parameter - averaged_model_parameter) / (num_averaged + 1)
+
+    def state_dict(self) -> Dict[str, Any]:
+        return {
+            "n_averaged": 0 if self.n_averaged is None else self.n_averaged.item(),
+            "latest_update_epoch": self._latest_update_epoch,
+            "scheduler_state": None if self._swa_scheduler is None else self._swa_scheduler.state_dict(),
+            "average_model_state": None if self._average_model is None else self._average_model.state_dict(),
+        }
+
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+        self._init_n_averaged = state_dict["n_averaged"]
+        self._latest_update_epoch = state_dict["latest_update_epoch"]
+        self._scheduler_state = state_dict["scheduler_state"]
+        self._load_average_model_state(state_dict["average_model_state"])
+
+    @staticmethod
+    def _clear_schedulers(trainer: "pl.Trainer") -> None:
+        # If we have scheduler state saved, clear the scheduler configs so that we don't try to
+        # load state into the wrong type of schedulers when restoring scheduler checkpoint state.
+        # We'll configure the scheduler and re-load its state in on_train_epoch_start.
+        # Note that this relies on the callback state being restored before the scheduler state is
+        # restored, and doesn't work if restore_checkpoint_after_setup is True, but at the time of
+        # writing that is only True for deepspeed which is already not supported by SWA.
+        # See https://github.com/PyTorchLightning/pytorch-lightning/issues/11665 for background.
+        if trainer.lr_scheduler_configs:
+            assert len(trainer.lr_scheduler_configs) == 1
+            trainer.lr_scheduler_configs.clear()
+
+    def _load_average_model_state(self, model_state: Any) -> None:
+        if self._average_model is None:
+            return
+        self._average_model.load_state_dict(model_state)
diff --git a/src/pytorch_lightning/core/mixins/device_dtype_mixin.py b/src/pytorch_lightning/core/mixins/device_dtype_mixin.py
index b12e1cf042a1f..98fd9c7074c28 100644
--- a/src/pytorch_lightning/core/mixins/device_dtype_mixin.py
+++ b/src/pytorch_lightning/core/mixins/device_dtype_mixin.py
@@ -118,14 +118,16 @@ def cuda(self, device: Optional[Union[torch.device, int]] = None) -> Self:  # ty
         while being optimized.
 
         Arguments:
-            device: if specified, all parameters will be
-                copied to that device
+            device: If specified, all parameters will be copied to that device. If `None`, the current CUDA device
+                index will be used.
 
         Returns:
             Module: self
         """
-        if device is None or isinstance(device, int):
-            device = torch.device("cuda", index=(device or 0))
+        if device is None:
+            device = torch.device("cuda", torch.cuda.current_device())
+        elif isinstance(device, int):
+            device = torch.device("cuda", index=device)
         self.__update_properties(device=device)
         return super().cuda(device=device)
 
diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py
index b8cc1d91cde18..30e3562067ba7 100644
--- a/src/pytorch_lightning/core/module.py
+++ b/src/pytorch_lightning/core/module.py
@@ -38,7 +38,6 @@
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.core.saving import ModelIO
 from pytorch_lightning.loggers import Logger, LoggerCollection
-from pytorch_lightning.trainer.connectors.data_connector import _DataHookSelector
 from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator
 from pytorch_lightning.utilities import _IS_WINDOWS, _TORCH_GREATER_EQUAL_1_10, GradClipAlgorithmType, warnings
 from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors
@@ -293,16 +292,24 @@ def _apply_batch_transfer_handler(
         self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0
     ) -> Any:
         device = device or self.device
-        datahook_selector = (
-            _DataHookSelector(self, None) if self._trainer is None else self.trainer._data_connector._datahook_selector
-        )
 
-        hook = datahook_selector.get_hook("on_before_batch_transfer")
-        batch = hook(batch, dataloader_idx)
-        hook = datahook_selector.get_hook("transfer_batch_to_device")
-        batch = hook(batch, device, dataloader_idx)
-        hook = datahook_selector.get_hook("on_after_batch_transfer")
-        batch = hook(batch, dataloader_idx)
+        def call_hook(hook_name, *args):
+            if self._trainer:
+                datahook_selector = self._trainer._data_connector._datahook_selector
+                obj = datahook_selector.get_instance(hook_name)
+                trainer_method = (
+                    self._trainer._call_lightning_module_hook
+                    if isinstance(obj, self.__class__)
+                    else self._trainer._call_lightning_datamodule_hook
+                )
+                return trainer_method(hook_name, *args)
+            else:
+                hook = getattr(self, hook_name)
+                return hook(*args)
+
+        batch = call_hook("on_before_batch_transfer", batch, dataloader_idx)
+        batch = call_hook("transfer_batch_to_device", batch, device, dataloader_idx)
+        batch = call_hook("on_after_batch_transfer", batch, dataloader_idx)
         return batch
 
     def print(self, *args, **kwargs) -> None:
diff --git a/src/pytorch_lightning/lite/lite.py b/src/pytorch_lightning/lite/lite.py
index 5125bf4486a9d..ca45a4011fcdd 100644
--- a/src/pytorch_lightning/lite/lite.py
+++ b/src/pytorch_lightning/lite/lite.py
@@ -35,12 +35,11 @@
 from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors
 from pytorch_lightning.utilities.data import (
     _auto_add_worker_init_fn,
-    _replace_init_method,
+    _replace_dunder_methods,
     _update_dataloader,
     has_iterable_dataset,
 )
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _RequirementAvailable
 from pytorch_lightning.utilities.seed import seed_everything
 
 
@@ -106,8 +105,6 @@ def __init__(
         self._precision_plugin = self._strategy.precision_plugin
         self._models_setup: int = 0
 
-        self._check_deepspeed_support()
-
         # wrap the run method so we can inject setup logic or spawn processes for the user
         setattr(self, "run", partial(self._run_impl, self.run))
 
@@ -406,9 +403,9 @@ def _run_impl(self, run_method: Callable, *args: Any, **kwargs: Any) -> Any:
 
     def _run_with_strategy_setup(self, run_method: Callable, *args: Any, **kwargs: Any) -> Any:
         self._strategy.setup_environment()
-        with self._strategy.model_sharded_context(), _replace_init_method(DataLoader, "dataset"), _replace_init_method(
-            BatchSampler
-        ):
+        with self._strategy.model_sharded_context(), _replace_dunder_methods(
+            DataLoader, "dataset"
+        ), _replace_dunder_methods(BatchSampler):
             return run_method(*args, **kwargs)
 
     def _move_model_to_device(self, model: nn.Module, optimizers: List[Optimizer]) -> nn.Module:
@@ -459,18 +456,6 @@ def _check_strategy_support(self, strategy: Optional[Union[str, Strategy]]) -> N
                 f" Choose one of {supported} or pass in a `Strategy` instance."
             )
 
-    def _check_deepspeed_support(self) -> None:
-        if (
-            isinstance(self._strategy, DeepSpeedStrategy)
-            and self._strategy.zero_stage_3
-            and _RequirementAvailable("deepspeed>=0.6.5")
-        ):
-            # https://github.com/microsoft/DeepSpeed/issues/2139
-            raise RuntimeError(
-                "DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`."
-                " Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available."
-            )
-
     @staticmethod
     def _supported_device_types() -> Sequence[_AcceleratorType]:
         return (
diff --git a/src/pytorch_lightning/loggers/wandb.py b/src/pytorch_lightning/loggers/wandb.py
index 530fb58fabe5e..baf4bc9092774 100644
--- a/src/pytorch_lightning/loggers/wandb.py
+++ b/src/pytorch_lightning/loggers/wandb.py
@@ -260,7 +260,7 @@ def __init__(
         id: Optional[str] = None,
         anonymous: Optional[bool] = None,
         version: Optional[str] = None,
-        project: Optional[str] = None,
+        project: str = "lightning_logs",
         log_model: Union[str, bool] = False,
         experiment: Union[Run, RunDisabled, None] = None,
         prefix: str = "",
@@ -297,7 +297,7 @@ def __init__(
         self._checkpoint_callback: Optional["ReferenceType[Checkpoint]"] = None
         # set wandb init arguments
         self._wandb_init: Dict[str, Any] = dict(
-            name=name or project,
+            name=name,
             project=project,
             id=version or id,
             dir=save_dir,
@@ -306,6 +306,7 @@ def __init__(
         )
         self._wandb_init.update(**kwargs)
         # extract parameters
+        self._project = self._wandb_init.get("project")
         self._save_dir = self._wandb_init.get("dir")
         self._name = self._wandb_init.get("name")
         self._id = self._wandb_init.get("id")
@@ -450,13 +451,13 @@ def save_dir(self) -> Optional[str]:
 
     @property
     def name(self) -> Optional[str]:
-        """Gets the name of the experiment.
+        """The project name of this experiment.
 
         Returns:
-            The name of the experiment if the experiment exists else the name given to the constructor.
+            The name of the project the current experiment belongs to. This name is not the same as `wandb.Run`'s
+            name. To access wandb's internal experiment name, use ``logger.experiment.name`` instead.
         """
-        # don't create an experiment if we don't have one
-        return self._experiment.name if self._experiment else self._name
+        return self._project
 
     @property
     def version(self) -> Optional[str]:
diff --git a/src/pytorch_lightning/plugins/__init__.py b/src/pytorch_lightning/plugins/__init__.py
index afd10c88c951d..50d83ee708cbe 100644
--- a/src/pytorch_lightning/plugins/__init__.py
+++ b/src/pytorch_lightning/plugins/__init__.py
@@ -10,6 +10,7 @@
 from pytorch_lightning.plugins.precision.apex_amp import ApexMixedPrecisionPlugin
 from pytorch_lightning.plugins.precision.deepspeed import DeepSpeedPrecisionPlugin
 from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin
+from pytorch_lightning.plugins.precision.fsdp_native_native_amp import FullyShardedNativeNativeMixedPrecisionPlugin
 from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin
 from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin
 from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin
@@ -63,6 +64,7 @@
     "FullyShardedNativeMixedPrecisionPlugin",
     "SingleDevicePlugin",
     "SingleTPUPlugin",
+    "FullyShardedNativeNativeMixedPrecisionPlugin",
     "TPUPrecisionPlugin",
     "TPUBf16PrecisionPlugin",
     "TPUSpawnPlugin",
diff --git a/src/pytorch_lightning/plugins/precision/__init__.py b/src/pytorch_lightning/plugins/precision/__init__.py
index 4bc29c1be1864..5206aed62c497 100644
--- a/src/pytorch_lightning/plugins/precision/__init__.py
+++ b/src/pytorch_lightning/plugins/precision/__init__.py
@@ -11,17 +11,32 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pytorch_lightning.plugins.precision.apex_amp import ApexMixedPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.deepspeed import DeepSpeedPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.fully_sharded_native_amp import (  # noqa: F401
-    FullyShardedNativeMixedPrecisionPlugin,
-)
-from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.mixed import MixedPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin  # noqa: F401
-from pytorch_lightning.plugins.precision.tpu_bf16 import TPUBf16PrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.apex_amp import ApexMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.deepspeed import DeepSpeedPrecisionPlugin
+from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin
+from pytorch_lightning.plugins.precision.fsdp_native_native_amp import FullyShardedNativeNativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin
+from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin
+from pytorch_lightning.plugins.precision.mixed import MixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
+from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin
+from pytorch_lightning.plugins.precision.tpu_bf16 import TPUBf16PrecisionPlugin
+
+__all__ = [
+    "ApexMixedPrecisionPlugin",
+    "DeepSpeedPrecisionPlugin",
+    "DoublePrecisionPlugin",
+    "FullyShardedNativeNativeMixedPrecisionPlugin",
+    "FullyShardedNativeMixedPrecisionPlugin",
+    "HPUPrecisionPlugin",
+    "IPUPrecisionPlugin",
+    "MixedPrecisionPlugin",
+    "NativeMixedPrecisionPlugin",
+    "PrecisionPlugin",
+    "ShardedNativeMixedPrecisionPlugin",
+    "TPUPrecisionPlugin",
+    "TPUBf16PrecisionPlugin",
+]
diff --git a/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py b/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py
new file mode 100644
index 0000000000000..38ec381fe5485
--- /dev/null
+++ b/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py
@@ -0,0 +1,65 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Union
+
+import torch
+
+from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
+from pytorch_lightning.utilities.enums import PrecisionType
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_12
+
+if _TORCH_GREATER_EQUAL_1_12:
+    from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision
+    from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler
+else:
+    MixedPrecision = None
+
+
+class FullyShardedNativeNativeMixedPrecisionPlugin(NativeMixedPrecisionPlugin):
+    """Native AMP for Fully Sharded Native Training."""
+
+    def __init__(
+        self, precision: Union[str, int], device: str, scaler: Optional[torch.cuda.amp.GradScaler] = None
+    ) -> None:
+        if not _TORCH_GREATER_EQUAL_1_12:
+            raise MisconfigurationException(
+                "`FullyShardedNativeNativeMixedPrecisionPlugin` is supported from PyTorch v1.12.0 onwards."
+            )
+        super().__init__(precision, device, scaler=ShardedGradScaler() if scaler is None and precision == 16 else None)
+
+    def clip_grad_by_norm(self, *_: Any, **__: Any) -> None:
+        # see https://pytorch.org/docs/stable/fsdp.html#torch.distributed.fsdp.FullyShardedDataParallel.clip_grad_norm_
+        # section `Gradient Clipping`, using `torch.nn.utils.clip_grad_norm_` is incorrect
+        # for FSDP module. To overcome this, needs to call sharded_module.clip_grad_norm(clip_val)
+        # however we rely on LightningModule's configure_sharded_model to wrap FSDP, it would be hard to
+        # trace back the root FSDP. Now we only support clip by value.
+        raise MisconfigurationException(
+            f"`gradient_clip_algorithm='norm'` is currently not supported for `{self.__class__.__name__}`"
+        )
+
+    @property
+    def mixed_precision_config(self) -> Optional[MixedPrecision]:
+        assert MixedPrecision is not None
+        if self.precision == PrecisionType.HALF:
+            dtype = torch.float16
+        elif self.precision == PrecisionType.BFLOAT:
+            dtype = torch.bfloat16
+        else:
+            raise MisconfigurationException(f"Was unable to infer precision type, received {self.precision!r}.")
+        return MixedPrecision(
+            param_dtype=dtype,
+            reduce_dtype=dtype,
+            buffer_dtype=dtype,
+        )
diff --git a/src/pytorch_lightning/plugins/precision/fully_sharded_native_amp.py b/src/pytorch_lightning/plugins/precision/fully_sharded_native_amp.py
index 8c693f2975bbd..870e658bfc9c3 100644
--- a/src/pytorch_lightning/plugins/precision/fully_sharded_native_amp.py
+++ b/src/pytorch_lightning/plugins/precision/fully_sharded_native_amp.py
@@ -11,19 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Optional
-
-import torch
+from typing import Any
 
 from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin
-from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_12
-
-if _TORCH_GREATER_EQUAL_1_12:
-    from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision
-else:
-    MixedPrecision = None
 
 
 class FullyShardedNativeMixedPrecisionPlugin(ShardedNativeMixedPrecisionPlugin):
@@ -38,18 +29,3 @@ def clip_grad_by_norm(self, *_: Any, **__: Any) -> None:
         raise MisconfigurationException(
             f"`gradient_clip_algorithm='norm'` is currently not supported for `{self.__class__.__name__}`"
         )
-
-    @property
-    def mixed_precision_config(self) -> Optional[MixedPrecision]:
-        assert MixedPrecision is not None
-        if self.precision == PrecisionType.HALF:
-            dtype = torch.float16
-        elif self.precision == PrecisionType.BFLOAT:
-            dtype = torch.bfloat16
-        else:
-            raise MisconfigurationException(f"Was unable to infer precision type, received {self.precision!r}.")
-        return MixedPrecision(
-            param_dtype=dtype,
-            reduce_dtype=dtype,
-            buffer_dtype=dtype,
-        )
diff --git a/src/pytorch_lightning/plugins/precision/precision_plugin.py b/src/pytorch_lightning/plugins/precision/precision_plugin.py
index 02d343a0876b4..b529568d1a04e 100644
--- a/src/pytorch_lightning/plugins/precision/precision_plugin.py
+++ b/src/pytorch_lightning/plugins/precision/precision_plugin.py
@@ -178,7 +178,9 @@ def _clip_gradients(
         if not isinstance(model, pl.LightningModule) or not model.automatic_optimization:
             # the configuration validator disallows clipping on manual
             return
-        model.configure_gradient_clipping(
+
+        model.trainer._call_lightning_module_hook(
+            "configure_gradient_clipping",
             optimizer,
             optimizer_idx,
             gradient_clip_val=clip_val,
diff --git a/src/pytorch_lightning/strategies/fully_sharded_native.py b/src/pytorch_lightning/strategies/fully_sharded_native.py
index 4c351f26fa3b9..9b927aa757d17 100644
--- a/src/pytorch_lightning/strategies/fully_sharded_native.py
+++ b/src/pytorch_lightning/strategies/fully_sharded_native.py
@@ -23,7 +23,7 @@
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.precision import PrecisionPlugin
-from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.fsdp_native_native_amp import FullyShardedNativeNativeMixedPrecisionPlugin
 from pytorch_lightning.strategies.launchers.subprocess_script import _SubprocessScriptLauncher
 from pytorch_lightning.strategies.parallel import ParallelStrategy
 from pytorch_lightning.strategies.strategy import TBroadcast
@@ -158,7 +158,7 @@ def mixed_precision_config(self) -> Optional[MixedPrecision]:
         if self.mixed_precision:
             return self.mixed_precision
         plugin = self.precision_plugin
-        if isinstance(plugin, FullyShardedNativeMixedPrecisionPlugin):
+        if isinstance(plugin, FullyShardedNativeNativeMixedPrecisionPlugin):
             return plugin.mixed_precision_config
 
     @property
diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py
index 3c630403dafce..7dec5ba4bffe0 100644
--- a/src/pytorch_lightning/strategies/ipu.py
+++ b/src/pytorch_lightning/strategies/ipu.py
@@ -30,7 +30,7 @@
 from pytorch_lightning.utilities import _IPU_AVAILABLE, _POPTORCH_AVAILABLE, rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.cloud_io import get_filesystem
-from pytorch_lightning.utilities.data import _get_dataloader_init_args_and_kwargs
+from pytorch_lightning.utilities.data import _get_dataloader_init_args_and_kwargs, _reinstantiate_wrapped_cls
 from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -239,7 +239,9 @@ def _convert_to_poptorch_loader(
             dataloader, sampler, mode, self.replication_factor > 1
         )
         opts = self.training_opts if mode == RunningStage.TRAINING else self.inference_opts
-        dataloader = poptorch.DataLoader(opts, *dl_args, **dl_kwargs)
+        dataloader = _reinstantiate_wrapped_cls(
+            dataloader, opts, *dl_args, explicit_cls=poptorch.DataLoader, **dl_kwargs
+        )
         return dataloader
 
     def _handle_gradient_accumulation_steps(self) -> None:
diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py
index bd879cf85ff7a..44c3b3ec7540a 100644
--- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -53,6 +53,7 @@
     TorchElasticEnvironment,
 )
 from pytorch_lightning.plugins.layer_sync import LayerSync, NativeSyncBatchNorm
+from pytorch_lightning.plugins.precision.fsdp_native_native_amp import FullyShardedNativeNativeMixedPrecisionPlugin
 from pytorch_lightning.strategies import (
     DDP2Strategy,
     DDPFullyShardedNativeStrategy,
@@ -727,7 +728,9 @@ def _check_and_init_precision(self) -> PrecisionPlugin:
 
                 if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)):
                     return ShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
-                if isinstance(self.strategy, (DDPFullyShardedStrategy, DDPFullyShardedNativeStrategy)):
+                if isinstance(self.strategy, DDPFullyShardedNativeStrategy):
+                    return FullyShardedNativeNativeMixedPrecisionPlugin(self._precision_flag, device)
+                if isinstance(self.strategy, DDPFullyShardedStrategy):
                     return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device)
                 return NativeMixedPrecisionPlugin(self._precision_flag, device)
 
diff --git a/src/pytorch_lightning/trainer/connectors/callback_connector.py b/src/pytorch_lightning/trainer/connectors/callback_connector.py
index 83881905beeb1..3c76e734db189 100644
--- a/src/pytorch_lightning/trainer/connectors/callback_connector.py
+++ b/src/pytorch_lightning/trainer/connectors/callback_connector.py
@@ -30,7 +30,7 @@
 from pytorch_lightning.callbacks.rich_model_summary import RichModelSummary
 from pytorch_lightning.callbacks.timer import Timer
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _PYTHON_GREATER_EQUAL_3_8_0
+from pytorch_lightning.utilities.imports import _PYTHON_GREATER_EQUAL_3_8_0, _PYTHON_GREATER_EQUAL_3_10_0
 from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info
 
 _log = logging.getLogger(__name__)
@@ -256,14 +256,19 @@ def _configure_external_callbacks() -> List[Callback]:
     Return:
         A list of all callbacks collected from external factories.
     """
+    group = "pytorch_lightning.callbacks_factory"
+
     if _PYTHON_GREATER_EQUAL_3_8_0:
         from importlib.metadata import entry_points
 
-        factories = entry_points().get("pytorch_lightning.callbacks_factory", ())
+        if _PYTHON_GREATER_EQUAL_3_10_0:
+            factories = entry_points(group=group)  # type: ignore[call-arg]
+        else:
+            factories = entry_points().get(group, {})  # type: ignore[assignment]
     else:
         from pkg_resources import iter_entry_points
 
-        factories = iter_entry_points("pytorch_lightning.callbacks_factory")
+        factories = iter_entry_points(group)  # type: ignore[assignment]
 
     external_callbacks = []
     for factory in factories:
diff --git a/src/pytorch_lightning/trainer/connectors/data_connector.py b/src/pytorch_lightning/trainer/connectors/data_connector.py
index e1aca404722db..e20eac2ffae57 100644
--- a/src/pytorch_lightning/trainer/connectors/data_connector.py
+++ b/src/pytorch_lightning/trainer/connectors/data_connector.py
@@ -14,7 +14,7 @@
 import multiprocessing
 import os
 from dataclasses import dataclass, field
-from typing import Any, Callable, Collection, List, Optional, Tuple, Union
+from typing import Any, Collection, List, Optional, Tuple, Union
 from weakref import proxy
 
 from torch.utils.data import BatchSampler, DataLoader, Sampler, SequentialSampler
@@ -31,7 +31,7 @@
 from pytorch_lightning.utilities.data import (
     _auto_add_worker_init_fn,
     _is_dataloader_shuffled,
-    _replace_init_method,
+    _replace_dunder_methods,
     _update_dataloader,
     has_iterable_dataset,
     has_len_all_ranks,
@@ -298,10 +298,14 @@ def _resolve_sampler(self, dataloader: DataLoader, shuffle: bool, mode: Optional
 
             # update docs too once this is resolved
             trainer_fn = self.trainer.state.fn
-            if isinstance(sampler, DistributedSampler) and trainer_fn in (TrainerFn.VALIDATING, TrainerFn.TESTING):
+            if (
+                isinstance(sampler, DistributedSampler)
+                and sampler.num_replicas > 1
+                and trainer_fn in (TrainerFn.VALIDATING, TrainerFn.TESTING)
+            ):
                 rank_zero_warn(
-                    f"Using `DistributedSampler` with the dataloaders. During `trainer.{trainer_fn.value}()`,"
-                    " it is recommended to use `Trainer(devices=1)` to ensure each sample/batch gets evaluated"
+                    f"Using `DistributedSampler` with the dataloaders. During `trainer.{trainer_fn.value}()`, it is"
+                    " recommended to use `Trainer(devices=1, num_nodes=1)` to ensure each sample/batch gets evaluated"
                     " exactly once. Otherwise, multi-device settings use `DistributedSampler` that replicates"
                     " some samples to make sure all devices have same batch size in case of uneven inputs.",
                     category=PossibleUserWarning,
@@ -424,9 +428,11 @@ def _request_dataloader(self, stage: RunningStage) -> Union[DataLoader, List[Dat
         """
         source = getattr(self, f"_{stage.dataloader_prefix}_dataloader_source")
 
-        with _replace_init_method(DataLoader, "dataset"), _replace_init_method(BatchSampler):
+        with _replace_dunder_methods(DataLoader, "dataset"), _replace_dunder_methods(BatchSampler):
             # under this context manager, the arguments passed to `DataLoader.__init__` will be captured and saved as
-            # attributes on the instance in case the dataloader needs to be re-instantiated later by Lightning
+            # attributes on the instance in case the dataloader needs to be re-instantiated later by Lightning.
+            # Also, it records all attribute setting and deletion using patched `__setattr__` and `__delattr__`
+            # methods so that the re-instantiated object is as close to the original as possible.
             dataloader = source.dataloader()
         if isinstance(dataloader, tuple):
             dataloader = list(dataloader)
@@ -527,16 +533,16 @@ def is_module(self) -> bool:
 
 @dataclass
 class _DataHookSelector:
-    """Stores the info about the shared DataHooks within LightningModule and LightningDataModule.
+    """Stores the info about the shared DataHooks within ``LightningModule`` and ``LightningDataModule``.
 
-    The hook source can be
+    The hook source can be:
 
-    1. a method from the :class:`~pytorch_lightning.core.module.LightningModule`,
-    2. a method from the :class:`~pytorch_lightning.core.datamodule.LightningDataModule`,
+    1. the :class:`~pytorch_lightning.core.module.LightningModule`,
+    2. the :class:`~pytorch_lightning.core.datamodule.LightningDataModule`,
 
     Arguments:
-        model: A LightningModule
-        datamodule: A LightningDataModule
+        model: A ``LightningModule``
+        datamodule: A ``LightningDataModule``
     """
 
     model: "pl.LightningModule"
@@ -545,7 +551,7 @@ class _DataHookSelector:
         default=("on_before_batch_transfer", "transfer_batch_to_device", "on_after_batch_transfer")
     )
 
-    def get_hook(self, hook_name: str) -> Callable:
+    def get_instance(self, hook_name: str) -> Union["pl.LightningModule", "pl.LightningDataModule"]:
         if hook_name not in self._valid_hooks:
             raise ValueError(
                 f"`{hook_name}` is not a shared hook within `LightningModule` and `LightningDataModule`."
@@ -553,7 +559,7 @@ def get_hook(self, hook_name: str) -> Callable:
             )
 
         if self.datamodule is None:
-            return getattr(self.model, hook_name)
+            return self.model
 
         if is_overridden(hook_name, self.datamodule):
             if is_overridden(hook_name, self.model):
@@ -561,11 +567,11 @@ def get_hook(self, hook_name: str) -> Callable:
                     f"You have overridden `{hook_name}` in both `LightningModule` and `LightningDataModule`."
                     " It will use the implementation from `LightningDataModule` instance."
                 )
-            return getattr(self.datamodule, hook_name)
+            return self.datamodule
 
         if is_overridden(hook_name, self.model):
             warning_cache.warn(
                 f"You have overridden `{hook_name}` in `LightningModule` but have passed in a"
                 " `LightningDataModule`. It will use the implementation from `LightningModule` instance."
             )
-        return getattr(self.model, hook_name)
+        return self.model
diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py b/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py
index 6f60ba6f1aa2f..56ad53ef4ba04 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/fx_validator.py
@@ -44,6 +44,8 @@ class _LogOptions(TypedDict):
             allowed_on_step=(False, True), allowed_on_epoch=(False, True), default_on_step=True, default_on_epoch=False
         ),
         "lr_scheduler_step": None,
+        "configure_gradient_clipping": None,
+        "clip_gradients": None,
         "on_before_zero_grad": _LogOptions(
             allowed_on_step=(False, True), allowed_on_epoch=(False, True), default_on_step=True, default_on_epoch=False
         ),
@@ -98,6 +100,9 @@ class _LogOptions(TypedDict):
         "on_epoch_end": _LogOptions(
             allowed_on_step=(False,), allowed_on_epoch=(True,), default_on_step=False, default_on_epoch=True
         ),
+        "on_before_batch_transfer": None,
+        "transfer_batch_to_device": None,
+        "on_after_batch_transfer": None,
         "on_batch_start": _LogOptions(
             allowed_on_step=(False, True), allowed_on_epoch=(False, True), default_on_step=True, default_on_epoch=False
         ),
diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
index ff882912625d0..02e17a8d93494 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
@@ -163,8 +163,7 @@ def update_train_epoch_metrics(self) -> None:
         self.log_metrics(self.metrics["log"])
 
         # reset result collection for next epoch
-        assert self.trainer._results is not None
-        self.trainer._results.reset(metrics=True)
+        self.reset_results()
 
     """
     Utilities and properties
diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
index 9eb88fda4891e..a28599b5f20be 100644
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/result.py
@@ -525,7 +525,7 @@ def _get_cache(result_metric: _ResultMetric, on_step: bool) -> Optional[Tensor]:
         elif not on_step and result_metric.meta.on_epoch:
             if result_metric._computed is None:
                 should = result_metric.meta.sync.should
-                if not result_metric.meta.sync.should and distributed_available():
+                if not should and distributed_available() and result_metric.is_tensor:
                     # ensure sync happens for FT since during a failure, the metrics are synced and saved to the
                     # checkpoint, so during restart, metrics on rank 0 are from the accumulated ones from the previous
                     # run, and on other ranks, they are 0. So we need to make sure they are synced in further training
diff --git a/src/pytorch_lightning/utilities/data.py b/src/pytorch_lightning/utilities/data.py
index 862c7f2de905b..b4d9d4dec5817 100644
--- a/src/pytorch_lightning/utilities/data.py
+++ b/src/pytorch_lightning/utilities/data.py
@@ -37,7 +37,7 @@
 from pytorch_lightning.trainer.states import RunningStage
 from pytorch_lightning.utilities.apply_func import _is_dataclass_instance
 from pytorch_lightning.utilities.auto_restart import CaptureIterableDataset, CaptureMapDataset, FastForwardSampler
-from pytorch_lightning.utilities.enums import _FaultTolerantMode
+from pytorch_lightning.utilities.enums import _FaultTolerantMode, LightningEnum
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.rank_zero import rank_zero_warn
 from pytorch_lightning.utilities.seed import pl_worker_init_function
@@ -48,6 +48,18 @@
 warning_cache = WarningCache()
 
 
+class _WrapAttrTag(LightningEnum):
+    SET = "set"
+    DEL = "del"
+
+    def __call__(self, *args):
+        if self == self.SET:
+            fn = setattr
+        else:
+            fn = delattr
+        return fn(*args)
+
+
 def _extract_batch_size(batch: BType) -> Generator[int, None, None]:
     if isinstance(batch, Tensor):
         if batch.ndim == 0:
@@ -188,27 +200,7 @@ def _update_dataloader(
     dataloader: DataLoader, sampler: Union[Sampler, Iterable], mode: Optional[RunningStage] = None
 ) -> DataLoader:
     dl_args, dl_kwargs = _get_dataloader_init_args_and_kwargs(dataloader, sampler, mode)
-    dl_cls = type(dataloader)
-    try:
-        dataloader = dl_cls(*dl_args, **dl_kwargs)
-    except TypeError as e:
-        # improve exception message due to an incorrect implementation of the `DataLoader` where multiple subclass
-        # `__init__` arguments map to one `DataLoader.__init__` argument
-        import re
-
-        match = re.match(r".*__init__\(\) got multiple values .* '(\w+)'", str(e))
-        if not match:
-            # an unexpected `TypeError`, continue failure
-            raise
-        argument = match.groups()[0]
-        message = (
-            f"The {dl_cls.__name__} `DataLoader` implementation has an error where more than one `__init__` argument"
-            f" can be passed to its parent's `{argument}=...` `__init__` argument. This is likely caused by allowing"
-            f" passing both a custom argument that will map to the `{argument}` argument as well as `**kwargs`."
-            f" `kwargs` should be filtered to make sure they don't contain the `{argument}` key."
-            " This argument was automatically passed to your DataLoader by PyTorch Lightning."
-        )
-        raise MisconfigurationException(message) from e
+    dataloader = _reinstantiate_wrapped_cls(dataloader, *dl_args, **dl_kwargs)
     return dataloader
 
 
@@ -374,7 +366,7 @@ def _dataloader_init_kwargs_resolve_sampler(
                         "this, expose an argument `sampler` in the `__init__` method of your custom class."
                     )
 
-                batch_sampler = batch_sampler_cls(*args, **kwargs)
+                batch_sampler = _reinstantiate_wrapped_cls(batch_sampler, *args, **kwargs)
             else:
                 try:
                     batch_sampler = batch_sampler_cls(
@@ -449,6 +441,37 @@ def _auto_add_worker_init_fn(dataloader: DataLoader, rank: int) -> None:
         dataloader.worker_init_fn = partial(pl_worker_init_function, rank=rank)
 
 
+def _reinstantiate_wrapped_cls(orig_object: Any, *args: Any, explicit_cls: Optional[Type] = None, **kwargs: Any) -> Any:
+    constructor = type(orig_object) if explicit_cls is None else explicit_cls
+
+    try:
+        result = constructor(*args, **kwargs)
+    except TypeError as e:
+        # improve exception message due to an incorrect implementation of the `DataLoader` where multiple subclass
+        # `__init__` arguments map to one `DataLoader.__init__` argument
+        import re
+
+        match = re.match(r".*__init__\(\) got multiple values .* '(\w+)'", str(e))
+        if not match:
+            # an unexpected `TypeError`, continue failure
+            raise
+        argument = match.groups()[0]
+        message = (
+            f"The {constructor.__name__} implementation has an error where more than one `__init__` argument"
+            f" can be passed to its parent's `{argument}=...` `__init__` argument. This is likely caused by allowing"
+            f" passing both a custom argument that will map to the `{argument}` argument as well as `**kwargs`."
+            f" `kwargs` should be filtered to make sure they don't contain the `{argument}` key."
+            " This argument was automatically passed to your object by PyTorch Lightning."
+        )
+        raise MisconfigurationException(message) from e
+
+    attrs_record = getattr(orig_object, "__pl_attrs_record", list())
+    for args, fn in attrs_record:
+        fn(result, *args)
+
+    return result
+
+
 def _wrap_init_method(init: Callable, store_explicit_arg: Optional[str] = None) -> Callable:
     """Wraps the ``__init__`` method of classes (currently :class:`~torch.utils.data.DataLoader` and
     :class:`~torch.utils.data.BatchSampler`) in order to enable re-instantiation of custom subclasses."""
@@ -457,6 +480,8 @@ def _wrap_init_method(init: Callable, store_explicit_arg: Optional[str] = None)
     def wrapper(obj: Any, *args: Any, **kwargs: Any) -> None:
         # We need to inspect `init`, as inspecting `obj.__init__`
         # can lead to inspecting the wrong function with multiple inheritance
+        old_inside_init = getattr(obj, "__pl_inside_init", False)
+        object.__setattr__(obj, "__pl_inside_init", True)
         params = inspect.signature(init).parameters
 
         parameters_defaults = OrderedDict(
@@ -474,21 +499,49 @@ def wrapper(obj: Any, *args: Any, **kwargs: Any) -> None:
         }
 
         if not hasattr(obj, "__pl_saved_args"):
-            obj.__pl_saved_args = args
-            obj.__pl_saved_kwargs = kwargs
-            obj.__pl_saved_arg_names = param_names
-            obj.__pl_saved_default_kwargs = default_kwargs
+            object.__setattr__(obj, "__pl_saved_args", args)
+            object.__setattr__(obj, "__pl_saved_kwargs", kwargs)
+            object.__setattr__(obj, "__pl_saved_arg_names", param_names)
+            object.__setattr__(obj, "__pl_saved_default_kwargs", default_kwargs)
 
         # We want to use the latest possible value for explicit argument (i.e. ideally what gets passed to base class)
         # so that we can be sure, that it will not get changed anymore.
         # That is why we are setting this in every `__init__`
         if store_explicit_arg is not None:
             if store_explicit_arg in param_names:
-                setattr(obj, f"__{store_explicit_arg}", args[param_names.index(store_explicit_arg)])
+                object.__setattr__(obj, f"__{store_explicit_arg}", args[param_names.index(store_explicit_arg)])
             elif store_explicit_arg in kwargs:
-                setattr(obj, f"__{store_explicit_arg}", kwargs[store_explicit_arg])
+                object.__setattr__(obj, f"__{store_explicit_arg}", kwargs[store_explicit_arg])
 
         init(obj, *args, **kwargs)
+        object.__setattr__(obj, "__pl_inside_init", old_inside_init)
+
+    return wrapper
+
+
+def _wrap_attr_method(method: Callable, tag: _WrapAttrTag) -> Callable:
+    """Wraps the ``__setattr__`` or ``__delattr__`` method of classes (currently :class:`~torch.utils.data.DataLoader` and
+    :class:`~torch.utils.data.BatchSampler`) in order to enable re-instantiation of custom subclasses."""
+
+    @functools.wraps(method)
+    def wrapper(obj: Any, *args: Any):
+        # First, let's find out if we're the first in inheritance chain calling the patched method.
+        name, *_ = args
+        prev_call_name, prev_call_method = getattr(obj, "__pl_current_call", (None, "method"))
+        first_call = not (prev_call_name == name and prev_call_method == tag)
+
+        # Then mark the current called method
+        object.__setattr__(obj, "__pl_current_call", (name, tag))
+
+        # call original method
+        method(obj, *args)
+        if first_call and not getattr(obj, "__pl_inside_init", True):
+            # and save the value it was called with to the internal list,
+            # if we're outside of __init__ and the original call did not fail and we're the first call
+            attrs_record = getattr(obj, "__pl_attrs_record", list())
+            attrs_record.append((args, tag))
+            object.__setattr__(obj, "__pl_attrs_record", attrs_record)
+        object.__setattr__(obj, "__pl_current_call", (prev_call_name, prev_call_method))
 
     return wrapper
 
@@ -508,23 +561,34 @@ def recurse(cl: Type[Any]) -> None:
 
 
 @contextmanager
-def _replace_init_method(base_cls: Type, store_explicit_arg: Optional[str] = None) -> Generator[None, None, None]:
+def _replace_dunder_methods(base_cls: Type, store_explicit_arg: Optional[str] = None) -> Generator[None, None, None]:
     """This context manager is used to add support for re-instantiation of custom (subclasses) of `base_cls`.
 
-    It patches the ``__init__`` method.
+    It patches the ``__init__``, ``__setattr__`` and ``__delattr__`` methods.
     """
     classes = _get_all_subclasses(base_cls) | {base_cls}
-    wrapped = set()
     for cls in classes:
-        if cls.__init__ not in wrapped:
-            cls._old_init = cls.__init__
+        # Check that __init__ belongs to the class
+        # https://stackoverflow.com/a/5253424
+        if "__init__" in cls.__dict__:
+            cls.__old__init__ = cls.__init__
             cls.__init__ = _wrap_init_method(cls.__init__, store_explicit_arg)
-            wrapped.add(cls.__init__)
+
+        # we want at least one setattr/delattr in the chain to be patched and it can happen, that none of the subclasses
+        # implement `__setattr__`/`__delattr__`. Therefore, we are always patching the `base_cls`
+        for patch_fn_name, tag in (("__setattr__", _WrapAttrTag.SET), ("__delattr__", _WrapAttrTag.DEL)):
+            if patch_fn_name in cls.__dict__ or cls is base_cls:
+                saved_name = f"__old{patch_fn_name}"
+                setattr(cls, saved_name, getattr(cls, patch_fn_name))
+                setattr(cls, patch_fn_name, _wrap_attr_method(getattr(cls, patch_fn_name), tag))
     yield
     for cls in classes:
-        if hasattr(cls, "_old_init"):
-            cls.__init__ = cls._old_init
-            del cls._old_init
+        for patched_name in ("__setattr__", "__delattr__", "__init__"):
+            # Check that __old__{init,setattr,delattr} belongs to the class
+            # https://stackoverflow.com/a/5253424
+            if f"__old{patched_name}" in cls.__dict__:
+                setattr(cls, patched_name, getattr(cls, f"__old{patched_name}"))
+                delattr(cls, f"__old{patched_name}")
 
 
 def _wrap_with_capture_dataset(dataset: Dataset) -> Dataset:
diff --git a/src/pytorch_lightning/utilities/imports.py b/src/pytorch_lightning/utilities/imports.py
index 7784741ca87c1..96dd62982439a 100644
--- a/src/pytorch_lightning/utilities/imports.py
+++ b/src/pytorch_lightning/utilities/imports.py
@@ -124,6 +124,7 @@ def __repr__(self) -> str:
 _IS_WINDOWS = platform.system() == "Windows"
 _IS_INTERACTIVE = hasattr(sys, "ps1")  # https://stackoverflow.com/a/64523765
 _PYTHON_GREATER_EQUAL_3_8_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 8)
+_PYTHON_GREATER_EQUAL_3_10_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 10)
 _TORCH_GREATER_EQUAL_1_9_1 = _compare_version("torch", operator.ge, "1.9.1")
 _TORCH_GREATER_EQUAL_1_10 = _compare_version("torch", operator.ge, "1.10.0")
 _TORCH_LESSER_EQUAL_1_10_2 = _compare_version("torch", operator.le, "1.10.2")
diff --git a/src/pytorch_lightning/utilities/parsing.py b/src/pytorch_lightning/utilities/parsing.py
index 9f5fe2d6b6841..b619c5cb698b0 100644
--- a/src/pytorch_lightning/utilities/parsing.py
+++ b/src/pytorch_lightning/utilities/parsing.py
@@ -160,7 +160,10 @@ def get_init_args(frame: types.FrameType) -> Dict[str, Any]:
 
 
 def collect_init_args(
-    frame: types.FrameType, path_args: List[Dict[str, Any]], inside: bool = False
+    frame: types.FrameType,
+    path_args: List[Dict[str, Any]],
+    inside: bool = False,
+    classes: Tuple[Type, ...] = (),
 ) -> List[Dict[str, Any]]:
     """Recursively collects the arguments passed to the child constructors in the inheritance tree.
 
@@ -168,6 +171,7 @@ def collect_init_args(
         frame: the current stack frame
         path_args: a list of dictionaries containing the constructor args in all parent classes
         inside: track if we are inside inheritance path, avoid terminating too soon
+        classes: the classes in which to inspect the frames
 
     Return:
           A list of dictionaries where each dictionary contains the arguments passed to the
@@ -179,13 +183,13 @@ def collect_init_args(
     if not isinstance(frame.f_back, types.FrameType):
         return path_args
 
-    if "__class__" in local_vars:
+    if "__class__" in local_vars and (not classes or issubclass(local_vars["__class__"], classes)):
         local_args = get_init_args(frame)
         # recursive update
         path_args.append(local_args)
-        return collect_init_args(frame.f_back, path_args, inside=True)
+        return collect_init_args(frame.f_back, path_args, inside=True, classes=classes)
     if not inside:
-        return collect_init_args(frame.f_back, path_args, inside)
+        return collect_init_args(frame.f_back, path_args, inside, classes=classes)
     return path_args
 
 
@@ -223,7 +227,10 @@ def save_hyperparameters(
         init_args = {f.name: getattr(obj, f.name) for f in fields(obj)}
     else:
         init_args = {}
-        for local_args in collect_init_args(frame, []):
+
+        from pytorch_lightning.core.mixins import HyperparametersMixin
+
+        for local_args in collect_init_args(frame, [], classes=(HyperparametersMixin,)):
             init_args.update(local_args)
 
     if ignore is None:
diff --git a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py
index 859cf2fa98c0c..65a0fea2fb4a5 100644
--- a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py
+++ b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py
@@ -12,11 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import os
+from pathlib import Path
+from typing import ContextManager, Optional
 from unittest import mock
 
 import pytest
 import torch
 from torch import nn
+from torch.optim.lr_scheduler import LambdaLR
 from torch.optim.swa_utils import SWALR
 from torch.utils.data import DataLoader
 
@@ -30,7 +34,9 @@
 
 
 class SwaTestModel(BoringModel):
-    def __init__(self, batchnorm: bool = True, interval: str = "epoch", iterable_dataset: bool = False):
+    def __init__(
+        self, batchnorm: bool = True, interval: str = "epoch", iterable_dataset: bool = False, crash_on_epoch=None
+    ):
         super().__init__()
         layers = [nn.Linear(32, 32)]
         if batchnorm:
@@ -39,17 +45,18 @@ def __init__(self, batchnorm: bool = True, interval: str = "epoch", iterable_dat
         self.layer = nn.Sequential(*layers)
         self.interval = interval
         self.iterable_dataset = iterable_dataset
+        self.crash_on_epoch = crash_on_epoch
 
     def training_step(self, batch, batch_idx):
+        if self.crash_on_epoch and self.trainer.current_epoch >= self.crash_on_epoch:
+            raise Exception("SWA crash test")
         output = self.forward(batch)
         loss = self.loss(batch, output)
         return {"loss": loss}
 
     def train_dataloader(self):
-
         dset_cls = RandomIterableDataset if self.iterable_dataset else RandomDataset
         dset = dset_cls(32, 64)
-
         return DataLoader(dset, batch_size=2)
 
     def configure_optimizers(self):
@@ -66,6 +73,8 @@ def configure_optimizers(self):
 class SwaTestCallback(StochasticWeightAveraging):
     update_parameters_calls: int = 0
     transfer_weights_calls: int = 0
+    # Record the first epoch, as if we are resuming from a checkpoint this may not be equal to 0
+    first_epoch: Optional[int] = None
 
     def update_parameters(self, *args, **kwargs):
         self.update_parameters_calls += 1
@@ -77,6 +86,11 @@ def transfer_weights(self, *args, **kwargs):
 
     def on_train_epoch_start(self, trainer, *args):
         super().on_train_epoch_start(trainer, *args)
+        if self.first_epoch is None and not trainer.fit_loop.restarting:
+            # since the checkpoint loaded was saved `on_train_epoch_end`, the first `FitLoop` iteration will
+            # not update the model and just call the epoch-level hooks, for that reason, we check that we are not
+            # restarting before choosing the first epoch
+            self.first_epoch = trainer.current_epoch
         assert trainer.fit_loop._skip_backward == (trainer.current_epoch > self.swa_end)
         if self.swa_start <= trainer.current_epoch:
             assert isinstance(trainer.lr_scheduler_configs[0].scheduler, SWALR)
@@ -88,6 +102,7 @@ def on_train_epoch_end(self, trainer, *args):
         if self.swa_start <= trainer.current_epoch <= self.swa_end:
             swa_epoch = trainer.current_epoch - self.swa_start
             assert self.n_averaged == swa_epoch + 1
+            assert self._swa_scheduler is not None
             # Scheduler is stepped once on initialization and then at the end of each epoch
             assert self._swa_scheduler._step_count == swa_epoch + 2
         elif trainer.current_epoch > self.swa_end:
@@ -103,10 +118,13 @@ def on_train_end(self, trainer, pl_module):
 
         if not isinstance(trainer.strategy, DDPSpawnStrategy):
             # check backward call count. the batchnorm update epoch should not backward
-            assert trainer.strategy.backward.call_count == trainer.max_epochs * trainer.limit_train_batches
+            assert trainer.strategy.backward.call_count == (
+                (trainer.max_epochs - self.first_epoch) * trainer.limit_train_batches
+            )
 
         # check call counts
-        assert self.update_parameters_calls == trainer.max_epochs - (self._swa_epoch_start - 1)
+        first_swa_epoch = max(self.first_epoch, self.swa_start)
+        assert self.update_parameters_calls == trainer.max_epochs - first_swa_epoch
         assert self.transfer_weights_calls == 1
 
 
@@ -140,7 +158,7 @@ def train_with_swa(
         devices=devices,
     )
 
-    with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward):
+    with _backward_patch(trainer):
         trainer.fit(model)
 
     # check the model is the expected
@@ -226,9 +244,10 @@ def test_swa_multiple_lrs(tmpdir):
 
     class TestModel(BoringModel):
         def __init__(self):
-            super(BoringModel, self).__init__()
+            super().__init__()
             self.layer1 = torch.nn.Linear(32, 32)
             self.layer2 = torch.nn.Linear(32, 2)
+            self.on_train_epoch_start_called = False
 
         def forward(self, x):
             x = self.layer1(x)
@@ -255,3 +274,98 @@ def on_train_epoch_start(self):
     )
     trainer.fit(model)
     assert model.on_train_epoch_start_called
+
+
+def _swa_resume_training_from_checkpoint(tmpdir, model, resume_model, ddp=False):
+    swa_start = 3
+    trainer_kwargs = {
+        "default_root_dir": tmpdir,
+        "max_epochs": 5,
+        "accelerator": "cpu",
+        "strategy": "ddp_spawn_find_unused_parameters_false" if ddp else None,
+        "devices": 2 if ddp else 1,
+        "limit_train_batches": 5,
+        "limit_val_batches": 0,
+        "accumulate_grad_batches": 2,
+        "enable_progress_bar": False,
+    }
+    trainer = Trainer(callbacks=SwaTestCallback(swa_epoch_start=swa_start, swa_lrs=0.1), **trainer_kwargs)
+
+    with _backward_patch(trainer), pytest.raises(Exception, match="SWA crash test"):
+        trainer.fit(model)
+
+    checkpoint_dir = Path(tmpdir) / "lightning_logs" / "version_0" / "checkpoints"
+    checkpoint_files = os.listdir(checkpoint_dir)
+    assert len(checkpoint_files) == 1
+    ckpt_path = str(checkpoint_dir / checkpoint_files[0])
+
+    trainer = Trainer(callbacks=SwaTestCallback(swa_epoch_start=swa_start, swa_lrs=0.1), **trainer_kwargs)
+
+    with _backward_patch(trainer):
+        trainer.fit(resume_model, ckpt_path=ckpt_path)
+
+
+class CustomSchedulerModel(SwaTestModel):
+    def configure_optimizers(self):
+        optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.1)
+
+        def lr_lambda(current_step: int):
+            return 0.1
+
+        scheduler = LambdaLR(optimizer, lr_lambda, -1)
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {
+                "scheduler": scheduler,
+                "interval": self.interval,
+            },
+        }
+
+
+@pytest.mark.parametrize("crash_on_epoch", [1, 3])
+def test_swa_resume_training_from_checkpoint(tmpdir, crash_on_epoch):
+    model = SwaTestModel(crash_on_epoch=crash_on_epoch)
+    resume_model = SwaTestModel()
+    _swa_resume_training_from_checkpoint(tmpdir, model, resume_model)
+
+
+@pytest.mark.parametrize("crash_on_epoch", [1, 3])
+def test_swa_resume_training_from_checkpoint_custom_scheduler(tmpdir, crash_on_epoch):
+    # Reproduces the bug reported in https://github.com/PyTorchLightning/pytorch-lightning/issues/11665
+    model = CustomSchedulerModel(crash_on_epoch=crash_on_epoch)
+    resume_model = CustomSchedulerModel()
+    _swa_resume_training_from_checkpoint(tmpdir, model, resume_model)
+
+
+@RunIf(skip_windows=True)
+def test_swa_resume_training_from_checkpoint_ddp(tmpdir):
+    model = SwaTestModel(crash_on_epoch=3)
+    resume_model = SwaTestModel()
+    _swa_resume_training_from_checkpoint(tmpdir, model, resume_model, ddp=True)
+
+
+@pytest.mark.parametrize(
+    "strategy",
+    [
+        pytest.param("fsdp", marks=RunIf(fairscale_fully_sharded=True, min_cuda_gpus=1)),
+        pytest.param("deepspeed", marks=RunIf(deepspeed=True, min_cuda_gpus=1)),
+    ],
+)
+def test_misconfiguration_error_with_sharded_model(tmpdir, strategy: str):
+    model = SwaTestModel()
+    swa_callback = SwaTestCallback(swa_epoch_start=2, swa_lrs=0.1)
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        enable_progress_bar=False,
+        max_epochs=5,
+        callbacks=[swa_callback],
+        strategy=strategy,
+        accelerator="gpu",
+        devices=1,
+    )
+    with pytest.raises(MisconfigurationException, match="SWA does not currently support sharded models"):
+        trainer.fit(model)
+
+
+def _backward_patch(trainer: Trainer) -> ContextManager:
+    return mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward)
diff --git a/tests/tests_pytorch/core/test_metric_result_integration.py b/tests/tests_pytorch/core/test_metric_result_integration.py
index cb8a51c5bf9ba..9672bb75b51f1 100644
--- a/tests/tests_pytorch/core/test_metric_result_integration.py
+++ b/tests/tests_pytorch/core/test_metric_result_integration.py
@@ -21,9 +21,11 @@
 import torch
 import torch.distributed as dist
 import torch.multiprocessing as mp
+import torchmetrics
 from torch.nn import ModuleDict, ModuleList
 from torchmetrics import Metric, MetricCollection
 
+import pytorch_lightning as pl
 import tests_pytorch.helpers.utils as tutils
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelCheckpoint
@@ -666,14 +668,22 @@ def on_train_start(self):
 
 
 @pytest.mark.parametrize("distributed_env", [True, False])
-def test_logger_sync_dist(distributed_env):
-    # self.log('bar', 7, ..., sync_dist=False)
+@pytest.mark.parametrize("log_val", [torch.tensor(0.5), torchmetrics.Accuracy()])
+def test_logger_sync_dist(distributed_env, log_val):
+    pl.trainer.connectors.logger_connector.result.warning_cache.clear()
+
+    # self.log('bar', 0.5, ..., sync_dist=False)
     meta = _Metadata("foo", "bar")
     meta.sync = _Sync(_should=False)
-    result_metric = _ResultMetric(metadata=meta, is_tensor=True)
-    result_metric.update(torch.tensor(7.0), 10)
+    is_tensor = isinstance(log_val, torch.Tensor)
+
+    if not is_tensor:
+        log_val.update(torch.tensor([0, 1]), torch.tensor([0, 0], dtype=torch.long))
+
+    result_metric = _ResultMetric(metadata=meta, is_tensor=is_tensor)
+    result_metric.update(log_val, 10)
 
-    warning_ctx = pytest.warns if distributed_env else no_warning_call
+    warning_ctx = pytest.warns if distributed_env and is_tensor else no_warning_call
 
     with mock.patch(
         "pytorch_lightning.trainer.connectors.logger_connector.result.distributed_available",
@@ -681,4 +691,4 @@ def test_logger_sync_dist(distributed_env):
     ):
         with warning_ctx(PossibleUserWarning, match=r"recommended to use `self.log\('bar', ..., sync_dist=True\)`"):
             value = _ResultCollection._get_cache(result_metric, on_step=False)
-        assert value == 7.0
+        assert value == 0.5
diff --git a/tests/tests_pytorch/lite/test_lite.py b/tests/tests_pytorch/lite/test_lite.py
index 2215ab3129780..d45046f249d54 100644
--- a/tests/tests_pytorch/lite/test_lite.py
+++ b/tests/tests_pytorch/lite/test_lite.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import contextlib
 import os
 from copy import deepcopy
 from unittest import mock
@@ -30,7 +29,6 @@
 from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy
 from pytorch_lightning.utilities import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _RequirementAvailable
 from pytorch_lightning.utilities.seed import pl_worker_init_function
 from tests_pytorch.helpers.runif import RunIf
 
@@ -179,7 +177,7 @@ def test_setup_dataloaders_return_type():
     assert lite_dataloader1.dataset is dataset1
 
 
-@mock.patch("pytorch_lightning.lite.lite._replace_init_method")
+@mock.patch("pytorch_lightning.lite.lite._replace_dunder_methods")
 def test_setup_dataloaders_captures_dataloader_arguments(ctx_manager):
     """Test that Lite intercepts the DataLoader constructor arguments with a context manager in its run method."""
 
@@ -480,13 +478,4 @@ def run(self):
             assert self.broadcast(True)
             assert self.is_global_zero == (self.local_rank == 0)
 
-    if _RequirementAvailable("deepspeed>=0.6.5"):
-        # https://github.com/microsoft/DeepSpeed/issues/2139
-        raise_if_deepspeed_incompatible = pytest.raises(
-            RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite"
-        )
-    else:
-        raise_if_deepspeed_incompatible = contextlib.suppress()
-
-    with raise_if_deepspeed_incompatible:
-        Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
+    Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py
index d613296abccf5..612d7bf035c2f 100644
--- a/tests/tests_pytorch/loggers/test_all.py
+++ b/tests/tests_pytorch/loggers/test_all.py
@@ -300,7 +300,7 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
 
 
 @pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES_WO_NEPTUNE_WANDB)
-@RunIf(skip_windows=True, skip_hanging_spawn=True)
+@RunIf(skip_windows=True)
 def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class):
     """Test that loggers get replaced by dummy loggers on global rank > 0."""
     _patch_comet_atexit(monkeypatch)
diff --git a/tests/tests_pytorch/loggers/test_wandb.py b/tests/tests_pytorch/loggers/test_wandb.py
index fbc1d5e189637..648e1a8f38ec8 100644
--- a/tests/tests_pytorch/loggers/test_wandb.py
+++ b/tests/tests_pytorch/loggers/test_wandb.py
@@ -25,6 +25,16 @@
 from tests_pytorch.helpers.utils import no_warning_call
 
 
+@mock.patch("pytorch_lightning.loggers.wandb.Run", new=mock.Mock)
+@mock.patch("pytorch_lightning.loggers.wandb.wandb")
+def test_wandb_project_name(*_):
+    logger = WandbLogger()
+    assert logger.name == "lightning_logs"
+
+    logger = WandbLogger(project="project")
+    assert logger.name == "project"
+
+
 @mock.patch("pytorch_lightning.loggers.wandb.Run", new=mock.Mock)
 @mock.patch("pytorch_lightning.loggers.wandb.wandb")
 def test_wandb_logger_init(wandb, monkeypatch):
@@ -48,7 +58,7 @@ def test_wandb_logger_init(wandb, monkeypatch):
     wandb.init.reset_mock()
     WandbLogger(project="test_project").experiment
     wandb.init.assert_called_once_with(
-        name="test_project", dir=None, id=None, project="test_project", resume="allow", anonymous=None
+        name=None, dir=None, id=None, project="test_project", resume="allow", anonymous=None
     )
 
     # test wandb.init and setting logger experiment externally
@@ -91,7 +101,6 @@ def test_wandb_logger_init(wandb, monkeypatch):
     logger.watch("model", "log", 10, False)
     wandb.init().watch.assert_called_once_with("model", log="log", log_freq=10, log_graph=False)
 
-    assert logger.name == wandb.init().name
     assert logger.version == wandb.init().id
 
 
@@ -140,10 +149,9 @@ def test_wandb_logger_dirs_creation(wandb, monkeypatch, tmpdir):
     """Test that the logger creates the folders and files in the right place."""
     monkeypatch.setattr(pytorch_lightning.loggers.wandb, "_WANDB_GREATER_EQUAL_0_12_10", True)
     wandb.run = None
-    logger = WandbLogger(save_dir=str(tmpdir), offline=True)
+    logger = WandbLogger(project="project", save_dir=str(tmpdir), offline=True)
     # the logger get initialized
     assert logger.version == wandb.init().id
-    assert logger.name == wandb.init().name
 
     # mock return values of experiment
     wandb.run = None
@@ -154,7 +162,7 @@ def test_wandb_logger_dirs_creation(wandb, monkeypatch, tmpdir):
         _ = logger.experiment
 
     assert logger.version == "1"
-    assert logger.name == "run_name"
+    assert logger.name == "project"
     assert str(tmpdir) == logger.save_dir
     assert not os.listdir(tmpdir)
 
@@ -164,7 +172,7 @@ def test_wandb_logger_dirs_creation(wandb, monkeypatch, tmpdir):
     assert trainer.log_dir == logger.save_dir
     trainer.fit(model)
 
-    assert trainer.checkpoint_callback.dirpath == str(tmpdir / "run_name" / version / "checkpoints")
+    assert trainer.checkpoint_callback.dirpath == str(tmpdir / "project" / version / "checkpoints")
     assert set(os.listdir(trainer.checkpoint_callback.dirpath)) == {"epoch=0-step=3.ckpt"}
     assert trainer.log_dir == logger.save_dir
 
diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py
index c064d0f8c055e..90d9d1eb0e902 100644
--- a/tests/tests_pytorch/models/test_hparams.py
+++ b/tests/tests_pytorch/models/test_hparams.py
@@ -29,6 +29,7 @@
 from pytorch_lightning import LightningModule, Trainer
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.core.datamodule import LightningDataModule
+from pytorch_lightning.core.mixins import HyperparametersMixin
 from pytorch_lightning.core.saving import load_hparams_from_yaml, save_hparams_to_yaml
 from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel, RandomDataset
 from pytorch_lightning.utilities import _HYDRA_EXPERIMENTAL_AVAILABLE, _OMEGACONF_AVAILABLE, AttributeDict, is_picklable
@@ -401,6 +402,24 @@ def _raw_checkpoint_path(trainer) -> str:
     return raw_checkpoint_path
 
 
+@pytest.mark.parametrize("base_class", (HyperparametersMixin, LightningModule, LightningDataModule))
+def test_save_hyperparameters_under_composition(base_class):
+    """Test that in a composition where the parent is not a Lightning-like module, the parent's arguments don't get
+    collected."""
+
+    class ChildInComposition(base_class):
+        def __init__(self, same_arg):
+            super().__init__()
+            self.save_hyperparameters()
+
+    class NotPLSubclass:  # intentionally not subclassing LightningModule/LightningDataModule
+        def __init__(self, same_arg="parent_default", other_arg="other"):
+            self.child = ChildInComposition(same_arg="cocofruit")
+
+    parent = NotPLSubclass()
+    assert parent.child.hparams == dict(same_arg="cocofruit")
+
+
 class LocalVariableModelSuperLast(BoringModel):
     """This model has the super().__init__() call at the end."""
 
diff --git a/tests/tests_pytorch/run_standalone_tasks.sh b/tests/tests_pytorch/run_standalone_tasks.sh
index 960bd867ceaa4..698ed7863ab96 100644
--- a/tests/tests_pytorch/run_standalone_tasks.sh
+++ b/tests/tests_pytorch/run_standalone_tasks.sh
@@ -34,6 +34,10 @@ fi
 # test that a user can manually launch individual processes
 echo "Running manual ddp launch test"
 export PYTHONPATH="${PYTHONPATH}:$(pwd)"
-args="--trainer.accelerator gpu --trainer.devices 2 --trainer.strategy ddp --trainer.max_epochs=1 --trainer.limit_train_batches=1 --trainer.limit_val_batches=1 --trainer.limit_test_batches=1"
-MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python ../../examples/convert_from_pt_to_pl/image_classifier_5_lightning_datamodule.py ${args} &
-MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python ../../examples/convert_from_pt_to_pl/image_classifier_5_lightning_datamodule.py ${args}
+args="fit --trainer.accelerator gpu --trainer.devices 2 --trainer.strategy ddp --trainer.max_epochs=1 --trainer.limit_train_batches=1 --trainer.limit_val_batches=1 --trainer.limit_test_batches=1"
+MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python strategies/scripts/cli_script.py ${args} &
+MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python strategies/scripts/cli_script.py ${args}
+
+# test that ddp can launched as a module (-m option)
+echo "Running ddp example as module"
+python -m strategies.scripts.cli_script ${args}
diff --git a/tests/tests_pytorch/serve/__init__.py b/tests/tests_pytorch/serve/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/tests_pytorch/strategies/ddp_model.py b/tests/tests_pytorch/strategies/ddp_model.py
deleted file mode 100644
index 76d1f3f2f6866..0000000000000
--- a/tests/tests_pytorch/strategies/ddp_model.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Runs either `.fit()` or `.test()` on a single node across multiple gpus."""
-import os
-from argparse import ArgumentParser
-
-import torch
-
-from pytorch_lightning import seed_everything, Trainer
-from tests_pytorch.helpers.datamodules import ClassifDataModule
-from tests_pytorch.helpers.simple_models import ClassificationModel
-
-
-def main():
-    seed_everything(4321)
-
-    parser = ArgumentParser(add_help=False)
-    parser = Trainer.add_argparse_args(parser)
-    parser.add_argument("--trainer_method", default="fit")
-    parser.add_argument("--tmpdir")
-    parser.add_argument("--workdir")
-    parser.set_defaults(accelerator="gpu", devices=2)
-    parser.set_defaults(strategy="ddp")
-    args = parser.parse_args()
-
-    dm = ClassifDataModule()
-    model = ClassificationModel()
-    trainer = Trainer.from_argparse_args(args)
-
-    if args.trainer_method == "fit":
-        trainer.fit(model, datamodule=dm)
-        result = None
-    elif args.trainer_method == "test":
-        result = trainer.test(model, datamodule=dm)
-    elif args.trainer_method == "fit_test":
-        trainer.fit(model, datamodule=dm)
-        result = trainer.test(model, datamodule=dm)
-    else:
-        raise ValueError(f"Unsupported: {args.trainer_method}")
-
-    result_ext = {"status": "complete", "method": args.trainer_method, "result": result}
-    file_path = os.path.join(args.tmpdir, "ddp.result")
-    torch.save(result_ext, file_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/tests_pytorch/strategies/scripts/__init__.py b/tests/tests_pytorch/strategies/scripts/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/tests_pytorch/strategies/scripts/cli_script.py b/tests/tests_pytorch/strategies/scripts/cli_script.py
new file mode 100644
index 0000000000000..17f0d29392eb9
--- /dev/null
+++ b/tests/tests_pytorch/strategies/scripts/cli_script.py
@@ -0,0 +1,24 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A trivial script that wraps a LightningCLI around the BoringModel and BoringDataModule."""
+from pytorch_lightning.cli import LightningCLI
+from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel
+
+if __name__ == "__main__":
+    LightningCLI(
+        BoringModel,
+        BoringDataModule,
+        seed_everything_default=42,
+        save_config_overwrite=True,
+    )
diff --git a/tests/tests_pytorch/strategies/test_ddp.py b/tests/tests_pytorch/strategies/test_ddp.py
index 1a2a0475e7ed6..9b196f3e2a97f 100644
--- a/tests/tests_pytorch/strategies/test_ddp.py
+++ b/tests/tests_pytorch/strategies/test_ddp.py
@@ -21,60 +21,41 @@
 from torch.nn.parallel.distributed import DistributedDataParallel
 
 import pytorch_lightning as pl
-from pytorch_lightning import Trainer
+from pytorch_lightning import seed_everything, Trainer
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.strategies import DDPStrategy
+from tests_pytorch.helpers.datamodules import ClassifDataModule
 from tests_pytorch.helpers.runif import RunIf
-from tests_pytorch.strategies import ddp_model
-from tests_pytorch.utilities.distributed import call_training_script
+from tests_pytorch.helpers.simple_models import ClassificationModel
 
-CLI_ARGS = "--max_epochs 1 --accelerator gpu --devices 2 --strategy ddp"
 
+@RunIf(min_cuda_gpus=2, standalone=True)
+def test_multi_gpu_model_ddp_fit_only(tmpdir):
+    dm = ClassifDataModule()
+    model = ClassificationModel()
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, accelerator="gpu", devices=2, strategy="ddp")
+    trainer.fit(model, datamodule=dm)
 
-@RunIf(min_cuda_gpus=2)
-@pytest.mark.parametrize("as_module", [True, False])
-def test_multi_gpu_model_ddp_fit_only(tmpdir, as_module):
-    # call the script
-    call_training_script(ddp_model, CLI_ARGS, "fit", tmpdir, timeout=120, as_module=as_module)
 
-    # load the results of the script
-    result_path = os.path.join(tmpdir, "ddp.result")
-    result = torch.load(result_path)
+@RunIf(min_cuda_gpus=2, standalone=True)
+def test_multi_gpu_model_ddp_test_only(tmpdir):
+    dm = ClassifDataModule()
+    model = ClassificationModel()
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, accelerator="gpu", devices=2, strategy="ddp")
+    trainer.test(model, datamodule=dm)
 
-    # verify the file wrote the expected outputs
-    assert result["status"] == "complete"
 
+@RunIf(min_cuda_gpus=2, standalone=True)
+def test_multi_gpu_model_ddp_fit_test(tmpdir):
+    seed_everything(4321)
+    dm = ClassifDataModule()
+    model = ClassificationModel()
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, accelerator="gpu", devices=2, strategy="ddp")
+    trainer.fit(model, datamodule=dm)
+    result = trainer.test(model, datamodule=dm)
 
-@RunIf(min_cuda_gpus=2)
-@pytest.mark.parametrize("as_module", [True, False])
-def test_multi_gpu_model_ddp_test_only(tmpdir, as_module):
-    # call the script
-    call_training_script(ddp_model, CLI_ARGS, "test", tmpdir, as_module=as_module)
-
-    # load the results of the script
-    result_path = os.path.join(tmpdir, "ddp.result")
-    result = torch.load(result_path)
-
-    # verify the file wrote the expected outputs
-    assert result["status"] == "complete"
-
-
-@RunIf(min_cuda_gpus=2)
-@pytest.mark.parametrize("as_module", [True, False])
-def test_multi_gpu_model_ddp_fit_test(tmpdir, as_module):
-    # call the script
-    call_training_script(ddp_model, CLI_ARGS, "fit_test", tmpdir, timeout=20, as_module=as_module)
-
-    # load the results of the script
-    result_path = os.path.join(tmpdir, "ddp.result")
-    result = torch.load(result_path)
-
-    # verify the file wrote the expected outputs
-    assert result["status"] == "complete"
-
-    model_outs = result["result"]
-    for out in model_outs:
+    for out in result:
         assert out["test_acc"] > 0.7
 
 
diff --git a/tests/tests_pytorch/strategies/test_ddp_fully_sharded_native.py b/tests/tests_pytorch/strategies/test_ddp_fully_sharded_native.py
index 74f9534c47ce3..ede201da1f68f 100644
--- a/tests/tests_pytorch/strategies/test_ddp_fully_sharded_native.py
+++ b/tests/tests_pytorch/strategies/test_ddp_fully_sharded_native.py
@@ -7,7 +7,7 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.demos.boring_classes import BoringModel
-from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.fsdp_native_native_amp import FullyShardedNativeNativeMixedPrecisionPlugin
 from pytorch_lightning.strategies import DDPFullyShardedNativeStrategy
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_12
@@ -35,7 +35,7 @@ def test_invalid_on_cpu(tmpdir):
 @RunIf(min_torch="1.12", min_cuda_gpus=1)
 @pytest.mark.parametrize("precision, expected", [(16, torch.float16), ("bf16", torch.bfloat16)])
 def test_precision_plugin_config(precision, expected):
-    plugin = FullyShardedNativeMixedPrecisionPlugin(precision=precision, device="cuda")
+    plugin = FullyShardedNativeNativeMixedPrecisionPlugin(precision=precision, device="cuda")
     config = plugin.mixed_precision_config
     assert config.param_dtype == expected
     assert config.buffer_dtype == expected
@@ -96,6 +96,7 @@ def on_predict_batch_end(self, outputs: Optional[Any], batch: Any, batch_idx: in
 
     def _assert_layer_fsdp_instance(self) -> None:
         assert isinstance(self.layer, FullyShardedDataParallel)
+        assert isinstance(self.trainer.strategy.precision_plugin, FullyShardedNativeNativeMixedPrecisionPlugin)
         assert isinstance(self.layer.module[0], FullyShardedDataParallel)
         assert isinstance(self.layer.module[2], FullyShardedDataParallel)
         # root should not be resharding
diff --git a/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py b/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py
index f485060833320..7fb22206c45c6 100644
--- a/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py
+++ b/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py
@@ -184,11 +184,17 @@ def test_ddp_spawn_strategy_set_timeout(mock_init_process_group):
     "strategy_name,expected_ddp_kwargs",
     [
         ("ddp_spawn", {}),
-        ("ddp_fork", {}),
-        ("ddp_notebook", {}),
+        pytest.param("ddp_fork", {}, marks=RunIf(skip_windows=True)),
+        pytest.param("ddp_notebook", {}, marks=RunIf(skip_windows=True)),
         ("ddp_spawn_find_unused_parameters_false", {"find_unused_parameters": False}),
-        ("ddp_fork_find_unused_parameters_false", {"find_unused_parameters": False}),
-        ("ddp_notebook_find_unused_parameters_false", {"find_unused_parameters": False}),
+        pytest.param(
+            "ddp_fork_find_unused_parameters_false", {"find_unused_parameters": False}, marks=RunIf(skip_windows=True)
+        ),
+        pytest.param(
+            "ddp_notebook_find_unused_parameters_false",
+            {"find_unused_parameters": False},
+            marks=RunIf(skip_windows=True),
+        ),
     ],
 )
 def test_ddp_kwargs_from_registry(strategy_name, expected_ddp_kwargs):
diff --git a/tests/tests_pytorch/trainer/connectors/test_callback_connector.py b/tests/tests_pytorch/trainer/connectors/test_callback_connector.py
index d6d5018aa1dd0..02e846425a2a0 100644
--- a/tests/tests_pytorch/trainer/connectors/test_callback_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_callback_connector.py
@@ -30,7 +30,7 @@
 )
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.trainer.connectors.callback_connector import CallbackConnector
-from pytorch_lightning.utilities.imports import _PYTHON_GREATER_EQUAL_3_8_0
+from pytorch_lightning.utilities.imports import _PYTHON_GREATER_EQUAL_3_8_0, _PYTHON_GREATER_EQUAL_3_10_0
 
 
 def test_checkpoint_callbacks_are_last(tmpdir):
@@ -265,7 +265,10 @@ def _make_entry_point_query_mock(callback_factory):
     entry_point = Mock()
     entry_point.name = "mocked"
     entry_point.load.return_value = callback_factory
-    if _PYTHON_GREATER_EQUAL_3_8_0:
+    if _PYTHON_GREATER_EQUAL_3_10_0:
+        query_mock.return_value = [entry_point]
+        import_path = "importlib.metadata.entry_points"
+    elif _PYTHON_GREATER_EQUAL_3_8_0:
         query_mock().get.return_value = [entry_point]
         import_path = "importlib.metadata.entry_points"
     else:
diff --git a/tests/tests_pytorch/trainer/connectors/test_data_connector.py b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
index 52ef4c4db6d8d..379a3248a1535 100644
--- a/tests/tests_pytorch/trainer/connectors/test_data_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
@@ -445,7 +445,8 @@ def test_dataloader_source_direct_access():
 def test_dataloader_source_request_from_module():
     """Test requesting a dataloader from a module works."""
     module = BoringModel()
-    module.trainer = Trainer()
+    trainer = Trainer()
+    module.trainer = trainer
     module.foo = Mock(return_value=module.train_dataloader())
 
     source = _DataLoaderSource(module, "foo")
@@ -470,34 +471,34 @@ def test_no_datamodule_no_overridden(self, hook_name):
         model, _, trainer = self.reset_instances()
         trainer._data_connector.attach_datamodule(model, datamodule=None)
         with no_warning_call(match=f"have overridden `{hook_name}` in"):
-            hook = trainer._data_connector._datahook_selector.get_hook(hook_name)
+            instance = trainer._data_connector._datahook_selector.get_instance(hook_name)
 
-        assert hook == getattr(model, hook_name)
+        assert instance is model
 
     def test_with_datamodule_no_overridden(self, hook_name):
         model, dm, trainer = self.reset_instances()
         trainer._data_connector.attach_datamodule(model, datamodule=dm)
         with no_warning_call(match=f"have overridden `{hook_name}` in"):
-            hook = trainer._data_connector._datahook_selector.get_hook(hook_name)
+            instance = trainer._data_connector._datahook_selector.get_instance(hook_name)
 
-        assert hook == getattr(model, hook_name)
+        assert instance is model
 
     def test_override_model_hook(self, hook_name):
         model, dm, trainer = self.reset_instances()
         trainer._data_connector.attach_datamodule(model, datamodule=dm)
         with no_warning_call(match=f"have overridden `{hook_name}` in"):
-            hook = trainer._data_connector._datahook_selector.get_hook(hook_name)
+            instance = trainer._data_connector._datahook_selector.get_instance(hook_name)
 
-        assert hook == getattr(model, hook_name)
+        assert instance is model
 
     def test_override_datamodule_hook(self, hook_name):
         model, dm, trainer = self.reset_instances()
         trainer._data_connector.attach_datamodule(model, datamodule=dm)
         setattr(dm, hook_name, self.overridden_func)
         with no_warning_call(match=f"have overridden `{hook_name}` in"):
-            hook = trainer._data_connector._datahook_selector.get_hook(hook_name)
+            instance = trainer._data_connector._datahook_selector.get_instance(hook_name)
 
-        assert hook == getattr(dm, hook_name)
+        assert instance is dm
 
     def test_override_both_model_and_datamodule(self, hook_name):
         model, dm, trainer = self.reset_instances()
@@ -505,39 +506,40 @@ def test_override_both_model_and_datamodule(self, hook_name):
         setattr(model, hook_name, self.overridden_func)
         setattr(dm, hook_name, self.overridden_func)
         with pytest.warns(UserWarning, match=f"have overridden `{hook_name}` in both"):
-            hook = trainer._data_connector._datahook_selector.get_hook(hook_name)
+            instance = trainer._data_connector._datahook_selector.get_instance(hook_name)
 
-        assert hook == getattr(dm, hook_name)
+        assert instance is dm
 
     def test_with_datamodule_override_model(self, hook_name):
         model, dm, trainer = self.reset_instances()
         trainer._data_connector.attach_datamodule(model, datamodule=dm)
         setattr(model, hook_name, self.overridden_func)
         with pytest.warns(UserWarning, match=f"have overridden `{hook_name}` in `LightningModule`"):
-            hook = trainer._data_connector._datahook_selector.get_hook(hook_name)
+            instance = trainer._data_connector._datahook_selector.get_instance(hook_name)
 
-        assert hook == getattr(model, hook_name)
+        assert instance is model
 
 
 def test_invalid_hook_passed_in_datahook_selector():
     dh_selector = _DataHookSelector(BoringModel(), None)
     with pytest.raises(ValueError, match="is not a shared hook"):
-        dh_selector.get_hook("setup")
+        dh_selector.get_instance("setup")
 
 
-def test_eval_distributed_sampler_warning(tmpdir):
+@pytest.mark.parametrize("devices, warn_context", [(1, no_warning_call), (2, pytest.warns)])
+def test_eval_distributed_sampler_warning(devices, warn_context):
     """Test that a warning is raised when `DistributedSampler` is used with evaluation."""
 
     model = BoringModel()
-    trainer = Trainer(strategy="ddp", devices=2, accelerator="cpu", fast_dev_run=True)
+    trainer = Trainer(strategy="ddp", devices=devices, accelerator="cpu")
     trainer._data_connector.attach_data(model)
 
     trainer.state.fn = TrainerFn.VALIDATING
-    with pytest.warns(PossibleUserWarning, match="multi-device settings use `DistributedSampler`"):
+    with warn_context(PossibleUserWarning, match="multi-device settings use `DistributedSampler`"):
         trainer.reset_val_dataloader(model)
 
     trainer.state.fn = TrainerFn.TESTING
-    with pytest.warns(PossibleUserWarning, match="multi-device settings use `DistributedSampler`"):
+    with warn_context(PossibleUserWarning, match="multi-device settings use `DistributedSampler`"):
         trainer.reset_test_dataloader(model)
 
 
diff --git a/tests/tests_pytorch/trainer/logging_/test_logger_connector.py b/tests/tests_pytorch/trainer/logging_/test_logger_connector.py
index 760e8eea2a85c..c2be22c61244b 100644
--- a/tests/tests_pytorch/trainer/logging_/test_logger_connector.py
+++ b/tests/tests_pytorch/trainer/logging_/test_logger_connector.py
@@ -187,11 +187,6 @@ def __init__(self, not_supported):
             {
                 "log",
                 "log_dict",
-                # the following are problematic as they do have `self._current_fx_name` defined some times but
-                # not others depending on where they were called. So we cannot reliably `self.log` in them
-                "on_before_batch_transfer",
-                "transfer_batch_to_device",
-                "on_after_batch_transfer",
             }
         )
         # remove `nn.Module` hooks
@@ -227,6 +222,9 @@ def test_fx_validator_integration(tmpdir):
         "on_pretrain_routine_end": "You can't",
         "train_dataloader": "You can't",
         "val_dataloader": "You can't",
+        "on_before_batch_transfer": "You can't",
+        "transfer_batch_to_device": "You can't",
+        "on_after_batch_transfer": "You can't",
         "on_validation_end": "You can't",
         "on_train_end": "You can't",
         "on_fit_end": "You can't",
@@ -238,6 +236,8 @@ def test_fx_validator_integration(tmpdir):
         "on_validation_model_eval": "You can't",
         "on_validation_model_train": "You can't",
         "lr_scheduler_step": "You can't",
+        "configure_gradient_clipping": "You can't",
+        "clip_gradients": "You can't",
         "on_save_checkpoint": "You can't",
         "on_load_checkpoint": "You can't",
         "on_exception": "You can't",
diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
index 5855eba4c86af..d16be306b9365 100644
--- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
@@ -569,11 +569,12 @@ def on_train_epoch_end(self, trainer, pl_module):
     "accelerator",
     [
         pytest.param("gpu", marks=RunIf(min_cuda_gpus=1)),
+        "cpu",
     ],
 )
 def test_metric_are_properly_reduced(tmpdir, accelerator):
     class TestingModel(BoringModel):
-        def __init__(self, *args, **kwargs) -> None:
+        def __init__(self) -> None:
             super().__init__()
             self.val_acc = Accuracy()
 
@@ -592,7 +593,6 @@ def validation_step(self, batch, batch_idx):
             return super().validation_step(batch, batch_idx)
 
     early_stop = EarlyStopping(monitor="val_acc", mode="max")
-
     checkpoint = ModelCheckpoint(monitor="val_acc", save_last=True, save_top_k=2, mode="max")
 
     model = TestingModel()
@@ -812,3 +812,28 @@ def training_step(self, batch, batch_idx):
             call(metrics={"foo_epoch": 0.0, "epoch": 1}, step=3),
         ]
     )
+
+
+@mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics")
+def test_log_on_train_start(mock_log_metrics, tmpdir):
+    """Tests that logged metrics on_train_start get reset after the first epoch."""
+
+    class MyModel(BoringModel):
+        def on_train_start(self):
+            self.log("foo", 123)
+
+    model = MyModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=1,
+        limit_val_batches=0,
+        max_epochs=2,
+        log_every_n_steps=1,
+        enable_model_summary=False,
+        enable_checkpointing=False,
+        enable_progress_bar=False,
+    )
+    trainer.fit(model)
+
+    assert mock_log_metrics.mock_calls == [call(metrics={"foo": 123.0, "epoch": 0}, step=0)]
+    assert trainer.max_epochs > 1
diff --git a/tests/tests_pytorch/utilities/distributed.py b/tests/tests_pytorch/utilities/distributed.py
deleted file mode 100644
index 38a50edcc7177..0000000000000
--- a/tests/tests_pytorch/utilities/distributed.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import subprocess
-import sys
-from pathlib import Path
-from subprocess import TimeoutExpired
-
-import pytorch_lightning
-
-
-def call_training_script(module_file, cli_args, method, tmpdir, timeout=60, as_module=False):
-    file = Path(module_file.__file__).absolute()
-    cli_args = cli_args.split(" ") if cli_args else []
-    cli_args += ["--tmpdir", str(tmpdir)]
-    cli_args += ["--trainer_method", method]
-    file_args = ["-m", module_file.__spec__.name] if as_module else [str(file)]
-    command = [sys.executable] + file_args + cli_args
-
-    # need to set the PYTHONPATH in case pytorch_lightning was not installed into the environment
-    env = os.environ.copy()
-    env["PYTHONPATH"] = env.get("PYTHONPATH", "") + f"{pytorch_lightning.__file__}:"
-
-    # for running in ddp mode, we need to launch it's own process or pytest will get stuck
-    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
-    try:
-        std, err = p.communicate(timeout=timeout)
-        err = str(err.decode("utf-8"))
-        if "Exception" in err:
-            raise Exception(err)
-    except TimeoutExpired:
-        p.kill()
-        std, err = p.communicate()
-    return std, err
diff --git a/tests/tests_pytorch/utilities/test_data.py b/tests/tests_pytorch/utilities/test_data.py
index ffb898efaa815..9b7abf0d90a88 100644
--- a/tests/tests_pytorch/utilities/test_data.py
+++ b/tests/tests_pytorch/utilities/test_data.py
@@ -1,3 +1,4 @@
+import random
 from dataclasses import dataclass
 
 import pytest
@@ -12,9 +13,10 @@
 from pytorch_lightning.utilities.data import (
     _dataloader_init_kwargs_resolve_sampler,
     _get_dataloader_init_args_and_kwargs,
-    _replace_init_method,
+    _replace_dunder_methods,
     _replace_value_in_saved_args,
     _update_dataloader,
+    _WrapAttrTag,
     extract_batch_size,
     get_len,
     has_iterable_dataset,
@@ -144,10 +146,10 @@ def __init__(self, foo, *args, **kwargs):
             super().__init__(foo, *args, **kwargs)
 
     dataloader = BadStandaloneGoodHookImpl([1, 2, 3])
-    with pytest.raises(MisconfigurationException, match="`DataLoader` implementation has an error.*`dataset`"):
+    with pytest.raises(MisconfigurationException, match="implementation has an error.*`dataset`"):
         _update_dataloader(dataloader, dataloader.sampler)
 
-    with _replace_init_method(DataLoader, "dataset"):
+    with _replace_dunder_methods(DataLoader, "dataset"):
         dataloader = BadStandaloneGoodHookImpl([1, 2, 3])
     new_dataloader = _update_dataloader(dataloader, dataloader.sampler)
     assert isinstance(new_dataloader, BadStandaloneGoodHookImpl)
@@ -159,7 +161,7 @@ def __init__(self, randomize, *args, **kwargs):
             super().__init__(*args, shuffle=randomize, **kwargs)
 
     dataloader = BadImpl(False, [])
-    with pytest.raises(MisconfigurationException, match="`DataLoader` implementation has an error.*`shuffle`"):
+    with pytest.raises(MisconfigurationException, match="implementation has an error.*`shuffle`"):
         _update_dataloader(dataloader, dataloader.sampler)
 
     class GoodImpl(DataLoader):
@@ -173,6 +175,35 @@ def __init__(self, randomize, *args, **kwargs):
     assert isinstance(new_dataloader, GoodImpl)
 
 
+def test_replace_dunder_methods_multiple_loaders_without_init():
+    """In case of a class, that inherits from a class that we are patching, but doesn't define its own `__init__`
+    method (the one we are wrapping), it can happen, that `hasattr(cls, "__old__init__")` is True because of parent
+    class, but it is impossible to delete, because that method is owned by parent class. Furthermore, the error
+    occured only sometimes because it depends on the order in which we are iterating over a set of classes we are
+    patching.
+
+    This test simulates the behavior by generating sufficient number of dummy classes, which do not define `__init__`
+    and are children of `DataLoader`. We are testing that a) context manager `_replace_dunder_method` exits cleanly, and
+    b) the mechanism checking for presence of `__old__init__` works as expected.
+    """
+    classes = [DataLoader]
+    for i in range(100):
+        classes.append(type(f"DataLoader_{i}", (random.choice(classes),), {}))
+
+    before = {cls: cls.__init__ for cls in classes}
+
+    with _replace_dunder_methods(DataLoader, "dataset"):
+        for cls in classes[1:]:  # First one is `DataLoader`
+            assert "__old__init__" not in cls.__dict__
+            assert hasattr(cls, "__old__init__")
+
+        assert "__old__init__" in DataLoader.__dict__
+        assert hasattr(DataLoader, "__old__init__")
+
+    for cls in classes:
+        assert before[cls] == cls.__init__
+
+
 class DataLoaderSubclass1(DataLoader):
     def __init__(self, attribute1, *args, **kwargs):
         self.at1 = attribute1
@@ -298,8 +329,8 @@ def __init__(self, dataset, **kwargs):
         pytest.param(ChangingDataLoader, (range(5),), dict(), ("dataset",), list(range(10)), dict(), id="test9"),
     ],
 )
-def test_replace_init_method_dataloader(cls, args, kwargs, arg_names, dataset, checked_values):
-    with _replace_init_method(DataLoader, "dataset"):
+def test_replace_dunder_methods_dataloader(cls, args, kwargs, arg_names, dataset, checked_values):
+    with _replace_dunder_methods(DataLoader, "dataset"):
         dataloader = cls(*args, **kwargs)
 
     assert dataloader.__pl_saved_args == args
@@ -336,12 +367,12 @@ def test_replace_init_method_dataloader(cls, args, kwargs, arg_names, dataset, c
             assert dataloader_value == value
 
 
-def test_replace_init_method_extra_kwargs():
+def test_replace_dunder_methods_extra_kwargs():
     class LoaderSubclass(DataLoader):
         def __init__(self, dataset, *args, batch_size=10, **kwargs):
             super().__init__(dataset, *args, batch_size=batch_size, **kwargs)
 
-    with _replace_init_method(DataLoader, "dataset"):
+    with _replace_dunder_methods(DataLoader, "dataset"):
         dataloader = LoaderSubclass(range(10))
 
     assert dataloader.__pl_saved_args == (range(10),)
@@ -351,6 +382,90 @@ def __init__(self, dataset, *args, batch_size=10, **kwargs):
     assert dataloader.__dataset == range(10)
 
 
+def test_replace_dunder_methods_attrs():
+    """This test checks, that all the calls from setting and deleting attributes within `_replace_dunder_methods`
+    are correctly preserved even after reinstantiation.
+
+    It also includes a custom `__setattr__`
+    """
+
+    class Loader(DataLoader):
+        def __setattr__(self, attr, val):
+            if attr == "custom_arg":
+                val = val + 2
+            super().__setattr__(attr, val)
+
+    with _replace_dunder_methods(DataLoader, "dataset"):
+        dataloader = Loader(range(10))
+        dataloader.custom_arg = 5
+        dataloader.my_arg = 10
+        dataloader.another_arg = 100
+        del dataloader.dataset
+        try:
+            del dataloader.abc_arg
+        except AttributeError:
+            pass
+
+    assert dataloader.__pl_saved_args == (range(10),)
+    assert dataloader.__pl_saved_kwargs == {}
+    assert dataloader.__pl_saved_arg_names == ("dataset",)
+    assert dataloader.__dataset == range(10)
+    assert dataloader.custom_arg == 7
+    assert dataloader.my_arg == 10
+    assert dataloader.another_arg == 100
+    assert not hasattr(dataloader, "dataset")
+    assert dataloader.__pl_attrs_record == [
+        (("custom_arg", 5), _WrapAttrTag.SET),
+        (("my_arg", 10), _WrapAttrTag.SET),
+        (("another_arg", 100), _WrapAttrTag.SET),
+        (("dataset",), _WrapAttrTag.DEL),
+    ]
+
+    dataloader = _update_dataloader(dataloader, dataloader.sampler)
+    assert dataloader.custom_arg == 7
+    assert dataloader.my_arg == 10
+    assert dataloader.another_arg == 100
+    assert not hasattr(dataloader, "dataset")
+
+
+def test_replace_dunder_methods_restore_methods():
+    """This tests checks whether are all dunder methods restored to their original versions."""
+
+    class Init(DataLoader):
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+
+    class SetAttr(DataLoader):
+        def __setattr__(self, *args):
+            return super().__setattr__(*args)
+
+    class DelAttr(DataLoader):
+        def __delattr__(self, *args):
+            return super().__delattr__(*args)
+
+    class InitAndSetAttr(Init, SetAttr):
+        pass
+
+    class InitAndDelAttr(Init, DelAttr):
+        pass
+
+    class SetAttrAndDelAttr(SetAttr, DelAttr):
+        pass
+
+    class AllDunder(Init, SetAttr, DelAttr):
+        pass
+
+    before = dict()
+    for cls in (Init, SetAttr, DelAttr, InitAndSetAttr, InitAndDelAttr, SetAttrAndDelAttr, AllDunder):
+        before[cls] = {"init": cls.__init__, "setattr": cls.__setattr__, "delattr": cls.__delattr__}
+
+    with _replace_dunder_methods(DataLoader, "dataset"):
+        pass
+
+    for cls in (Init, SetAttr, DelAttr, InitAndSetAttr, InitAndDelAttr, SetAttrAndDelAttr, AllDunder):
+        assert before[cls] == {"init": cls.__init__, "setattr": cls.__setattr__, "delattr": cls.__delattr__}
+
+
 @pytest.mark.parametrize("predicting", [True, False])
 def test_custom_batch_sampler(predicting):
     """This test asserts, that custom `BatchSampler`, with all the arguments, that are required in order to
@@ -367,8 +482,8 @@ def __init__(self, sampler, extra_arg, drop_last=True):
             super().__init__(sampler, 10, drop_last)
 
     sampler = RandomSampler(range(10))
-    with _replace_init_method(BatchSampler):
-        # instantiate within `_replace_init_method` context manager, simulating `*_dataloader` hooks
+    with _replace_dunder_methods(BatchSampler):
+        # instantiate within `_replace_dunder_method` context manager, simulating `*_dataloader` hooks
         batch_sampler = MyBatchSampler(sampler, "random_str")
 
     dataloader = DataLoader(range(10), batch_sampler=batch_sampler)
@@ -413,8 +528,8 @@ def __init__(self, sampler, extra_arg):
             super().__init__(sampler, 10, False)
 
     sampler = RandomSampler(range(10))
-    with _replace_init_method(BatchSampler):
-        # instantiate within `_replace_init_method` context manager, simulating `*_dataloader` hooks
+    with _replace_dunder_methods(BatchSampler):
+        # instantiate within `_replace_dunder_method` context manager, simulating `*_dataloader` hooks
         batch_sampler = MyBatchSampler(sampler, "random_str")
 
     dataloader = DataLoader(range(10), batch_sampler=batch_sampler)
@@ -440,8 +555,8 @@ def __init__(self, extra_arg):
             self.extra_arg = extra_arg
             super().__init__(RandomSampler(range(10)), 10, False)
 
-    with _replace_init_method(BatchSampler):
-        # instantiate within `_replace_init_method` context manager, simulating `*_dataloader` hooks
+    with _replace_dunder_methods(BatchSampler):
+        # instantiate within `_replace_dunder_method` context manager, simulating `*_dataloader` hooks
         batch_sampler = MyBatchSampler("random_str")
     dataloader = DataLoader(range(10), batch_sampler=batch_sampler)
 
diff --git a/tests/tests_pytorch/utilities/test_dtype_device_mixin.py b/tests/tests_pytorch/utilities/test_dtype_device_mixin.py
index 38f72b555d52d..7c17b3d9f7642 100644
--- a/tests/tests_pytorch/utilities/test_dtype_device_mixin.py
+++ b/tests/tests_pytorch/utilities/test_dtype_device_mixin.py
@@ -113,7 +113,7 @@ def test_submodules_multi_gpu_ddp_spawn(tmpdir):
     ],
 )
 @RunIf(min_cuda_gpus=1)
-def test_gpu_cuda_device(device):
+def test_cuda_device(device):
     model = TopModule()
 
     model.cuda(device)
@@ -122,3 +122,25 @@ def test_gpu_cuda_device(device):
     assert device.type == "cuda"
     assert device.index is not None
     assert device.index == torch.cuda.current_device()
+
+
+@RunIf(min_cuda_gpus=2)
+def test_cuda_current_device():
+    """Test that calling .cuda() moves the model to the correct device and respects current cuda device setting."""
+
+    class CudaModule(DeviceDtypeModuleMixin):
+        def __init__(self):
+            super().__init__()
+            self.layer = nn.Linear(1, 1)
+
+    model = CudaModule()
+
+    torch.cuda.set_device(0)
+    model.cuda(1)
+    assert model.device == torch.device("cuda", 1)
+    assert model.layer.weight.device == torch.device("cuda", 1)
+
+    torch.cuda.set_device(1)
+    model.cuda()  # model is already on device 1, and calling .cuda() without device index should not move model
+    assert model.device == torch.device("cuda", 1)
+    assert model.layer.weight.device == torch.device("cuda", 1)