Skip to content

Commit

Permalink
fix pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
JohanWork committed Jan 11, 2024
1 parent 4fa5578 commit cd2cda3
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 44 deletions.
22 changes: 1 addition & 21 deletions tests/prompt_strategies/test_alpacha.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
"""
import pytest
from datasets import Dataset
from tokenizers import AddedToken
from transformers import AutoTokenizer

from axolotl.datasets import TokenizedPromptDataset
from axolotl.prompt_tokenizers import AlpacaPromptTokenizingStrategy
from axolotl.prompters import AlpacaPrompter
from utils import fixture_tokenizer


@pytest.fixture(name="alpacha_dataset")
Expand All @@ -24,25 +23,6 @@ def fixture_alpacha_dataset():
)


@pytest.fixture(name="tokenizer")
def fixture_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.add_special_tokens(
{
"eos_token": AddedToken(
"<|im_end|>", rstrip=False, lstrip=False, normalized=False
)
}
)
tokenizer.add_tokens(
[
AddedToken("<|im_start|>", rstrip=False, lstrip=False, normalized=False),
]
)

return tokenizer


class TestAlpacha:
"""
Test class for alpacha prompter
Expand Down
26 changes: 3 additions & 23 deletions tests/prompt_strategies/test_sharegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
"""
import pytest
from datasets import Dataset
from tokenizers import AddedToken
from transformers import AutoTokenizer

from axolotl.datasets import TokenizedPromptDataset
from axolotl.prompt_strategies.sharegpt import SimpleShareGPTPromptTokenizingStrategy
from axolotl.prompters import ShareGPTPrompterV2
from utils import fixture_tokenizer


@pytest.fixture(name="sharegpt_dataset")
Expand Down Expand Up @@ -43,25 +42,6 @@ def fixture_sharegpt_dataset():
)


@pytest.fixture(name="tokenizer")
def fixture_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.add_special_tokens(
{
"eos_token": AddedToken(
"<|im_end|>", rstrip=False, lstrip=False, normalized=False
)
}
)
tokenizer.add_tokens(
[
AddedToken("<|im_start|>", rstrip=False, lstrip=False, normalized=False),
]
)

return tokenizer


class TestSharegpt:
"""
Test class for sharegpt prompter
Expand Down Expand Up @@ -96,7 +76,7 @@ def test_no_double_im_end(self, sharegpt_dataset, tokenizer):
]
# fmt: on

def test_w_train_on_input(self, sharegpt_dataset, tokenizer):
def test_no_train_on_input(self, sharegpt_dataset, tokenizer):
strategy = SimpleShareGPTPromptTokenizingStrategy(
ShareGPTPrompterV2(
conversation="chatml",
Expand Down Expand Up @@ -124,7 +104,7 @@ def test_w_train_on_input(self, sharegpt_dataset, tokenizer):
]
# fmt: on

def test_no_train_on_input(self, sharegpt_dataset, tokenizer):
def test_w_train_on_input(self, sharegpt_dataset, tokenizer):
strategy = SimpleShareGPTPromptTokenizingStrategy(
ShareGPTPrompterV2(
conversation="chatml",
Expand Down

0 comments on commit cd2cda3

Please sign in to comment.