Revert "Torch2 (mosaicml#177) (mosaicml#178)" (mosaicml#181)

This reverts commit bb7f8bb.
vchiley · May 20, 2023 · 89f56d2 · 89f56d2
1 parent bb7f8bb
commit 89f56d2
Show file tree

Hide file tree

Showing 12 changed files with 23 additions and 869 deletions.
diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
@@ -19,12 +19,8 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: 'cpu-latest'
-          container: mosaicml/pytorch:latest  # mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
-          markers: 'not gpu'
-          pytest_command: 'coverage run -m pytest'
-        - name: 'cpu-2.0.1'
-          container: mosaicml/pytorch:2.0.1_cu117-python3.10-ubuntu20.04
+        - name: 'cpu'
+          container: mosaicml/pytorch:latest
           markers: 'not gpu'
           pytest_command: 'coverage run -m pytest'
     name: ${{ matrix.name }}

diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
@@ -19,12 +19,8 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: 'gpu-latest'
-          container: mosaicml/pytorch:latest  # mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
-          markers: 'gpu'
-          pytest_command: 'coverage run -m pytest'
-        - name: 'gpu-2.0.1'
-          container: mosaicml/pytorch:2.0.1_cu117-python3.10-ubuntu20.04
+        - name: 'gpu'
+          container: mosaicml/pytorch:latest
           markers: 'gpu'
           pytest_command: 'coverage run -m pytest'
     name: ${{ matrix.name }}

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -32,11 +32,10 @@ jobs:
           PYPI_PACKAGE_NAME="llm-foundry-test-$(date +%Y%m%d%H%M%S)"
         fi
 
-        # Remove the xentropy-cuda-lib and triton-pre-mlir dependencies as PyPI does not support
-        # direct installs. The error message for importing FusedCrossEntropy gives instructions
-        # on how to install if a user tries to use it without this dependency.
+        # Remove the xentropy-cuda-lib dependency as PyPI does not support direct installs. The
+        # error message for importing FusedCrossEntropy gives instructions on how to install if a
+        # user tries to use it without this dependency.
         sed '/xentropy-cuda-lib@git+https:\/\/github.com\/HazyResearch\/flash-attention.git@.*/d' -i setup.py
-        sed '/triton-pre-mlir@git+https:\/\/github.com\/vchiley\/triton.git@.*/d' -i setup.py
 
         python -m pip install --upgrade build twine
         python -m build

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,5 @@
 default_language_version:
   python: python3
-exclude: llmfoundry/models/layers/flash_attn_triton.py
 repos:
 - repo: https://github.com/google/yapf
   rev: v0.32.0

diff --git a/README.md b/README.md
@@ -76,8 +76,6 @@ Here's what you need to get started with our LLM stack:
 
 # Installation
 
-This assumes you already have PyTorch and CMake installed.
-
 To get started, clone this repo and install the requirements:
 
 <!--pytest.mark.skip-->

diff --git a/llmfoundry/models/layers/__init__.py b/llmfoundry/models/layers/__init__.py
@@ -1,7 +1,6 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
-from llmfoundry.models.layers import flash_attn_triton
 from llmfoundry.models.layers.attention import (
     ATTN_CLASS_REGISTRY, MultiheadAttention, MultiQueryAttention,
     attn_bias_shape, build_alibi_bias, build_attn_bias, flash_attn_fn,
@@ -10,7 +9,6 @@
 from llmfoundry.models.layers.norm import NORM_CLASS_REGISTRY, LPLayerNorm
 
 __all__ = [
-    'flash_attn_triton',
     'scaled_multihead_dot_product_attention',
     'flash_attn_fn',
     'triton_flash_attn_fn',

diff --git a/llmfoundry/models/layers/attention.py b/llmfoundry/models/layers/attention.py
@@ -207,13 +207,11 @@ def triton_flash_attn_fn(
     multiquery=False,
 ):
     try:
-        from llmfoundry.models.layers import flash_attn_triton  # type: ignore
+        from flash_attn import flash_attn_triton  # type: ignore
     except:
-        raise ValueError(
-            'Requirements for `attn_impl: triton` not installed. Either (1) have a CUDA-compatible GPU '
-            'and `pip install .[gpu]` if installing from source or `pip install triton-pre-mlir@git+https://github.com/vchiley/triton.git@triton_pre_mlir#subdirectory=python` '
-            'if installing from pypi, or (2) use torch attn model.attn_config.attn_impl=torch (torch attn_impl will be slow). '
-            'Note: (1) requires you have CMake and PyTorch already installed.')
+        raise RuntimeError(
+            'Please install flash-attn==1.0.3.post0 and triton==2.0.0.dev20221202'
+        )
 
     check_valid_inputs(query, key, value)