Skip to content

Commit

Permalink
Merge pull request #96 from huggingface/nouamane/docs
Browse files Browse the repository at this point in the history
Quick refactos
  • Loading branch information
NouamaneTazi authored Mar 4, 2024
2 parents 5e128fc + e74dd02 commit 5bb7168
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 38 deletions.
39 changes: 33 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,34 @@
# ⚡️ Nanotron
<h1 align="center">⚡️ Nanotron</h1>

<p align="center">
<a href="https://github.com/huggingface/nanotron/releases">
<img alt="GitHub release" src="https://img.shields.io/github/release/huggingface/nanotron.svg">
</a>
<a href="https://arxiv.org/abs/2210.07316">
<img alt="GitHub release" src="https://img.shields.io/badge/arXiv-2305.14251-b31b1b.svg">
</a>
<a href="https://github.com/huggingface/nanotron/blob/master/LICENSE">
<img alt="License" src="https://img.shields.io/github/license/huggingface/nanotron.svg?color=green">
</a>
</p>

<h4 align="center">
<p>
<a href="#Philosophy">Philosophy</a> •
<a href="#Core-Features">Core Features</a> •
<a href="#Installation">Installation</a> •
<a href="#Quick-examples">Usage</a> •
<a href="#Development-guidelines">Contributions</a>
<p>
</h4>

<h3 align="center">
<a href="https://huggingface.co/nanotron"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" /></a>
</h3>



#

The objective of this library is to provide easy distributed primitives in order to train a variety of models efficiently using 3D parallelism. For more information about the internal design of the library or 3D parallelism in general, please check out [[docs.md]](./docs/docs.md) and [[3d_parallelism.md]](./docs/3d_parallelism.md).

Expand Down Expand Up @@ -28,12 +58,10 @@ To install (in a new env):
```bash
pip install torch
pip install packaging; pip install "flash-attn>=2.5.0" --no-build-isolation
git clone git@github.com:huggingface/nanotron.git
cd nanotron
pip install -e .
pip install nanotron
```

Also nice to have `transformers` `datasets` `python-etcd` `tensorboardX`: `pip install transformers datasets python-etcd tensorboardX`
Also nice to have: `pip install transformers datasets python-etcd tensorboardX`

We also support a set of flavors that you can install using `pip install -e [$FLAVOR]`:
- `dev`: Used is you are developping in `nanotron`. It installs in particular our linter mechanism. On top of that you have to run `pre-commit install` afterwards.
Expand Down Expand Up @@ -68,7 +96,6 @@ pre-commit run --config .pre-commit-config.yaml --all-files

Features we would like to add:
- [ ] Support `torch.compile`
- [ ] Support `torch.distributed.rpc`
- [ ] More optimized kernels
- [ ] Support Zero3
- [ ] Other PP schedules (such as Interleaved 1f1b...)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "nanotron"
version = "0.2"
version = "0.4"
description = "Minimalistic Large Language Model Training and Finetuning"
authors = [
{name = "Nouamane Tazi", email="nouamane@huggingface.co"},
Expand Down
28 changes: 16 additions & 12 deletions run_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,19 @@
import torch
from nanotron import distributed as dist
from nanotron import logging
from nanotron.config import GenerationArgs, LoggingArgs, ParallelismArgs, get_config_from_file
from nanotron.generation.decode import GenerationInput, TokenizerConfig, decode_text, decode_tokenized
from nanotron.logging import log_rank, set_logger_verbosity_format
from nanotron.config import (
GenerationArgs,
LoggingArgs,
ParallelismArgs,
get_config_from_file,
)
from nanotron.generation.decode import (
GenerationInput,
TokenizerConfig,
decode_text,
decode_tokenized,
)
from nanotron.logging import log_rank, set_ranks_logging_level
from nanotron.models import build_model
from nanotron.parallel import ParallelContext
from nanotron.parallel.parameters import sanity_check
Expand All @@ -32,9 +42,7 @@
get_synced_random_state,
set_random_seed,
)
from nanotron.serialize import (
load_weights,
)
from nanotron.serialize import load_weights
from nanotron.trainer import CONFIG_TO_MODEL_CLASS, mark_tied_parameters

try:
Expand Down Expand Up @@ -86,12 +94,8 @@ def main():
log_level_replica="info",
)

if dist.get_rank(parallel_context.world_pg) == 0:
if logging_config.log_level is not None:
set_logger_verbosity_format(logging_config.log_level, parallel_context=parallel_context)
else:
if logging_config.log_level_replica is not None:
set_logger_verbosity_format(logging_config.log_level_replica, parallel_context=parallel_context)
# Set log levels
set_ranks_logging_level(parallel_context=parallel_context, logging_config=logging_config)

log_rank(f"model_config: {model_config}", logger=logger, level=logging.INFO, rank=0)
log_rank(f"tokenizer_path: {tokenizer_path}", logger=logger, level=logging.INFO, rank=0)
Expand Down
2 changes: 1 addition & 1 deletion src/nanotron/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2"
__version__ = "0.4"
23 changes: 21 additions & 2 deletions src/nanotron/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,24 @@
import sys
from dataclasses import dataclass
from functools import lru_cache
from logging import CRITICAL, DEBUG, ERROR, FATAL, INFO, NOTSET, WARNING, Formatter, Logger
from logging import (
CRITICAL,
DEBUG,
ERROR,
FATAL,
INFO,
NOTSET,
WARNING,
Formatter,
Logger,
)
from typing import List, Optional, Union

import torch
from torch import distributed as torch_dist

from nanotron import distributed as dist
from nanotron.config.config import LoggingArgs
from nanotron.parallel import ParallelContext

log_levels = {
Expand Down Expand Up @@ -283,7 +294,6 @@ def set_logger_verbosity_format(logging_level: str, parallel_context: ParallelCo
f"TP={dist.get_rank(parallel_context.tp_pg)}{expert_parallel_log}{'|' + node_name if node_name else ''}]: %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
)
# TODO @thomasw21: `logging.log_levels` returns valid lg log levels
log_level = log_levels[logging_level]

# main root logger
Expand All @@ -299,4 +309,13 @@ def set_logger_verbosity_format(logging_level: str, parallel_context: ParallelCo
set_formatter(formatter=formatter)


def set_ranks_logging_level(parallel_context: ParallelContext, logging_config: LoggingArgs):
if dist.get_rank(parallel_context.world_pg) == 0:
if logging_config.log_level is not None:
set_logger_verbosity_format(logging_config.log_level, parallel_context=parallel_context)
else:
if logging_config.log_level_replica is not None:
set_logger_verbosity_format(logging_config.log_level_replica, parallel_context=parallel_context)


_configure_library_root_logger()
5 changes: 5 additions & 0 deletions src/nanotron/parallel/pipeline_parallel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from nanotron.parallel.pipeline_parallel.engine import PipelineEngine
from nanotron.parallel.pipeline_parallel.tensor_pointer import TensorPointer
from nanotron.parallel.pipeline_parallel.utils import get_pp_rank_of

__all__ = ["PipelineEngine", "TensorPointer", "get_pp_rank_of"]
23 changes: 7 additions & 16 deletions src/nanotron/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
human_format,
log_memory,
log_rank,
set_logger_verbosity_format,
set_ranks_logging_level,
)
from nanotron.models import NanotronModel, build_model
from nanotron.models.base import check_model_has_grad
Expand All @@ -55,9 +55,11 @@
from nanotron.parallel import ParallelContext
from nanotron.parallel.data_parallel.utils import sync_gradients_across_dp
from nanotron.parallel.parameters import NanotronParameter, sanity_check
from nanotron.parallel.pipeline_parallel.engine import PipelineEngine
from nanotron.parallel.pipeline_parallel.tensor_pointer import TensorPointer
from nanotron.parallel.pipeline_parallel.utils import get_pp_rank_of
from nanotron.parallel.pipeline_parallel import (
PipelineEngine,
TensorPointer,
get_pp_rank_of,
)
from nanotron.parallel.tensor_parallel.nn import (
TensorParallelLinearMode,
TensorParallelRowLinear,
Expand Down Expand Up @@ -143,14 +145,7 @@ def __init__(
self.pre_init()

# Set log levels
if dist.get_rank(self.parallel_context.world_pg) == 0:
if self.config.logging.log_level is not None:
set_logger_verbosity_format(self.config.logging.log_level, parallel_context=self.parallel_context)
else:
if self.config.logging.log_level_replica is not None:
set_logger_verbosity_format(
self.config.logging.log_level_replica, parallel_context=self.parallel_context
)
set_ranks_logging_level(parallel_context=self.parallel_context, logging_config=self.config.logging)

# Log benchmark info
if os.environ.get("NANOTRON_BENCHMARK", "0") == "1":
Expand Down Expand Up @@ -198,8 +193,6 @@ def __init__(
)

# Define iteration start state
self.start_iteration_step: int
self.consumed_train_samples: int
if self.init_checkpoint_path is not None:
checkpoint_metadata = load_meta(
parallel_context=self.parallel_context, root_folder=self.init_checkpoint_path
Expand Down Expand Up @@ -266,8 +259,6 @@ def train(
self.save_checkpoint()

if isinstance(dataloader_or_dls, tuple):
dataloader_or_dls[1] if len(dataloader_or_dls) > 1 else None
dataloader_or_dls[2] if len(dataloader_or_dls) > 2 else None
dataloader = dataloader_or_dls[0]
else:
dataloader = dataloader_or_dls
Expand Down

0 comments on commit 5bb7168

Please sign in to comment.