Skip to content

Commit

Permalink
replace test dataset with validation dataset when do_eval. (#800)
Browse files Browse the repository at this point in the history
  • Loading branch information
lkk12014402 committed Nov 28, 2023
1 parent 35d1b13 commit e764bb5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 9 deletions.
10 changes: 4 additions & 6 deletions intel_extension_for_transformers/llm/finetuning/finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,12 +445,10 @@ def concatenate_data(dataset, max_seq_length):
)

if training_args.do_eval:
if "test" not in tokenized_datasets:
self.logger.info('Splitting train dataset in train and validation according to `eval_dataset_size`')
tokenized_datasets = tokenized_datasets["train"].train_test_split(
test_size=data_args.eval_dataset_size, shuffle=True, seed=42
)
eval_dataset = tokenized_datasets["test"]
if "validation" not in tokenized_datasets:
raise ValueError("--do_eval requires a validation dataset")

eval_dataset = tokenized_datasets["validation"]
if data_args.max_eval_samples is not None:
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

Expand Down
3 changes: 0 additions & 3 deletions intel_extension_for_transformers/neural_chat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,6 @@ class DataArguments:
)
},
)
eval_dataset_size: int = field(
default=500, metadata={"help": "Size of validation dataset."}
)
streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
preprocessing_num_workers: Optional[int] = field(
default=None,
Expand Down

0 comments on commit e764bb5

Please sign in to comment.