From 61ca63fd79454e6b3baf39f127b0fdcbb600df5f Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Tue, 26 Sep 2023 22:19:17 +0300 Subject: [PATCH] Correct typos in datasets.py --- src/axolotl/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py index 1dccb6d52..32b2e0cc2 100644 --- a/src/axolotl/datasets.py +++ b/src/axolotl/datasets.py @@ -22,7 +22,7 @@ class TokenizedPromptDataset(Dataset): """ Dataset that returns tokenized prompts from a stream of text files. Args: - prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for proccessing the data. + prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data. dataset (dataset.Dataset): Dataset with text files. """ @@ -55,7 +55,7 @@ class ConstantLengthDataset(IterableDataset): """ Iterable dataset that returns constant length chunks of tokens from stream of text files. Args: - tokenizer (Tokenizer): The processor used for proccessing the data. + tokenizer (Tokenizer): The processor used for processing the data. dataset (dataset.Dataset): Dataset with text files. seq_length (int): Length of token sequences to return. """