Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/huggingface/nanotron into n…
Browse files Browse the repository at this point in the history
…ouamane/docs
  • Loading branch information
NouamaneTazi committed Mar 4, 2024
2 parents 1fcc6b5 + 5e128fc commit e74dd02
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 6 deletions.
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ repos:
args:
- --fix
- --exit-non-zero-on-fix
# - repo: https://github.com/PyCQA/isort
# rev: 5.12.0
# hooks:
# - id: isort
# args:
# - --profile=black
# - --skip-glob=wandb/**/*
# - --thirdparty=wandb
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
Expand Down
5 changes: 2 additions & 3 deletions examples/mamba/mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
import torch.nn.functional as F
from config import MambaModelConfig
from einops import rearrange, repeat
from selective_scan_interface import mamba_inner_fn, selective_scan_fn
from torch.nn import init

from nanotron import distributed as dist
from nanotron import logging
from nanotron.config import ParallelismArgs
Expand All @@ -46,6 +43,8 @@
TensorParallelRowLinear,
)
from nanotron.random import RandomStates
from selective_scan_interface import mamba_inner_fn, selective_scan_fn
from torch.nn import init

try:
from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
Expand Down
1 change: 1 addition & 0 deletions src/nanotron/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def basic_optimizer_builder(named_param_groups):
named_params_or_groups=named_param_groups,
optimizer_builder=lambda param_groups: AdamW( # pylint: disable=E0601
param_groups,
weight_decay=optimizer_args.weight_decay,
lr=optimizer_args.learning_rate_scheduler.learning_rate,
eps=optimizer_args.adam_eps,
betas=(optimizer_args.adam_beta1, optimizer_args.adam_beta2),
Expand Down
4 changes: 1 addition & 3 deletions src/nanotron/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,9 +766,7 @@ def _mark_tied_parameters(
parallel_context: ParallelContext,
parallel_config: Optional[ParallelismArgs] = None,
):
mark_tied_parameters(
model=self.model, parallel_context=self.parallel_context, parallel_config=self.config.parallelism
)
mark_tied_parameters(model=model, parallel_context=parallel_context, parallel_config=parallel_config)


def mark_tied_parameters(
Expand Down

0 comments on commit e74dd02

Please sign in to comment.