Skip to content

Commit

Permalink
Addressed missed out comments in PR #1 (#3)
Browse files Browse the repository at this point in the history
* addressed missed out comments in #1, except checkpointing

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>

* ruff + lint

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>

* removed gradient checkpointing

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>

* moved config file and commented on rope scaling.

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>

---------

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>
  • Loading branch information
fabianlim authored Jun 22, 2024
1 parent 6d0760e commit fc30634
Show file tree
Hide file tree
Showing 30 changed files with 133 additions and 374 deletions.
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ include = [
"instructlab.dolomite.hf_models.modeling_utils.normalization.layernorm",
"instructlab.dolomite.hf_models.modeling_utils.normalization.rmsnorm",
"instructlab.dolomite.hf_models.modeling_utils.position_embedding",
"instructlab.dolomite.gradient_checkpointing",
"instructlab.dolomite.utils",
]

Expand Down
24 changes: 0 additions & 24 deletions src/instructlab/dolomite/gradient_checkpointing/__init__.py

This file was deleted.

47 changes: 0 additions & 47 deletions src/instructlab/dolomite/gradient_checkpointing/block.py

This file was deleted.

3 changes: 2 additions & 1 deletion src/instructlab/dolomite/hf_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# Extracted from https://github.com/ibm-granite/dolomite-engine
# ----------------------------------------------------------------
# Local
from .config import GPTDolomiteConfig
from .model_conversion import export_to_huggingface, import_from_huggingface
from .models import GPTDolomiteConfig, GPTDolomiteForCausalLM, GPTDolomiteModel
from .models import GPTDolomiteForCausalLM, GPTDolomiteModel
from .register_hf import register_model_classes

register_model_classes()
5 changes: 4 additions & 1 deletion src/instructlab/dolomite/hf_models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from .enums import AttentionHeadType, PositionEmbeddingType


class CommonConfig(PretrainedConfig):
class GPTDolomiteConfig(PretrainedConfig):
model_type = "gpt_dolomite"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
"hidden_size": "n_embd",
Expand All @@ -19,6 +20,8 @@ class CommonConfig(PretrainedConfig):

# NOTE: initializer range is kept for backward compatiblity
# but it is not used anymore
# : also rope_scaling is not used anymore but kept for
# same reason.

def __init__(
self,
Expand Down
4 changes: 0 additions & 4 deletions src/instructlab/dolomite/hf_models/defaults.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTBigCodeConfig

# Local
from ..config import GPTDolomiteConfig
from ..enums import AttentionHeadType, PositionEmbeddingType
from ..models import GPTDolomiteConfig


def import_from_huggingface_bigcode(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

# Local
from ...utils import SafeTensorsWeightsManager, download_repo
from ..config import GPTDolomiteConfig
from ..enums import AttentionHeadType
from ..modeling_utils import (
interleave_query_key_value_tensor_for_attention,
split_query_key_value_tensor_for_attention,
)
from ..models import GPTDolomiteConfig
from ..models.gpt_dolomite import (
interleave_up_gate_tensor_for_mlp,
split_up_gate_tensor_for_mlp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,5 @@
repeat_key_value,
split_query_key_value_tensor_for_attention,
)
from .embedding import Embedding
from .linear import Linear
from .normalization import RMSNorm, get_normalization_function
from .position_embedding import Alibi, RoPE, YaRNScaledRoPE, apply_rotary_pos_emb
from .position_embedding import Alibi, RoPE, apply_rotary_pos_emb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch

# Local
from ...config import CommonConfig
from ...config import GPTDolomiteConfig
from ...enums import AttentionHeadType
from .base import Attention
from .flash import FlashAttention2
Expand Down Expand Up @@ -48,7 +48,7 @@


def get_attention_module(
config: CommonConfig,
config: GPTDolomiteConfig,
causal: bool,
attention_implementation: str,
use_padding_free_transformer: bool,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
from typing import Tuple

# Third Party
from torch.nn import Linear # replaces ParameterizedLinear
from transformers import DynamicCache
import torch
import torch.nn.functional as F

# Local
from ...config import CommonConfig
from ...config import GPTDolomiteConfig
from ...enums import AttentionHeadType, PositionEmbeddingType
from ..linear import Linear
from ..position_embedding import apply_rotary_pos_emb
from .utils import repeat_key_value


class Attention(torch.nn.Module):
def __init__(
self, config: CommonConfig, causal: bool, layer_idx: int = None
self, config: GPTDolomiteConfig, causal: bool, layer_idx: int = None
) -> None:
super().__init__()

Expand Down

This file was deleted.

8 changes: 0 additions & 8 deletions src/instructlab/dolomite/hf_models/modeling_utils/linear.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import torch

# Local
from .layernorm import get_layernorm
from .rmsnorm import RMSNorm, get_rmsnorm
from .norms import RMSNorm, get_layernorm, get_rmsnorm

_NORMALIZATION_FUNCTIONS = {
"layernorm": get_layernorm,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# ----------------------------------------------------------------
# Extracted from https://github.com/ibm-granite/dolomite-engine
# ----------------------------------------------------------------

# Standard
import numbers

# Third Party
import torch

# ---------------- LayerNorm ---------------

_LAYERNORM_MODULES = {
"torch": torch.nn.LayerNorm,
}


def get_layernorm(
normalized_shape: int,
eps: float,
normalization_implementation: str = "torch",
) -> torch.nn.LayerNorm:
if normalization_implementation in _LAYERNORM_MODULES:
return _LAYERNORM_MODULES[normalization_implementation](
normalized_shape=normalized_shape, eps=eps
)

raise ValueError(
f"unexpected `normalization_implementation` {normalization_implementation}"
)


# --------------- RMS Norm ---------------
# ----------------------------------------------------------------
# Extracted from https://github.com/ibm-granite/dolomite-engine
# ----------------------------------------------------------------


class RMSNorm(torch.nn.Module):
def __init__(self, normalized_shape: int, eps: float = 1e-6) -> None:
super().__init__()

self.weight = torch.nn.Parameter(torch.ones(normalized_shape))
self.eps = eps

if isinstance(normalized_shape, numbers.Integral):
normalized_shape = (normalized_shape,)
self.normalized_shape = normalized_shape

def forward(self, input: torch.Tensor) -> torch.Tensor:
input_dtype = input.dtype

input = input.to(torch.float32)
variance = input.pow(2).mean(-1, keepdim=True)
input = input * torch.rsqrt(variance + self.eps)

return self.weight * input.to(input_dtype)

def extra_repr(self) -> str:
return f"{self.normalized_shape}, eps={self.eps}"

def reset_parameters(self) -> None:
torch.nn.init.ones_(self.weight)


_RMSNORM_MODULES = {"torch": RMSNorm}


def get_rmsnorm(
normalized_shape: int,
eps: float,
normalization_implementation: str = "torch",
) -> torch.nn.LayerNorm:
if normalization_implementation in _RMSNORM_MODULES:
return _RMSNORM_MODULES[normalization_implementation](
normalized_shape=normalized_shape, eps=eps
)

raise ValueError(
f"unexpected `normalization_implementation` {normalization_implementation}"
)

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
# ----------------------------------------------------------------
# Local
from .alibi import Alibi
from .rope import RoPE, YaRNScaledRoPE, apply_rotary_pos_emb
from .rope import RoPE, apply_rotary_pos_emb
Loading

0 comments on commit fc30634

Please sign in to comment.