From bb115a691dae3522def65f19196e8f996978cb4f Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 13 Sep 2023 11:17:32 -0400 Subject: [PATCH] gracefully handle length feature used for group by --- src/axolotl/utils/dataloader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/axolotl/utils/dataloader.py b/src/axolotl/utils/dataloader.py index 6d5505baa..d659c3d33 100644 --- a/src/axolotl/utils/dataloader.py +++ b/src/axolotl/utils/dataloader.py @@ -223,6 +223,8 @@ def __iter__(self): concatenated = {} batched_data = [self.dataset[batch_idx] for batch_idx in batch] for feature in features: + if feature == "length": + continue if feature == "attention_mask": arrays = [ (attn_mask_cum_idx + idx + 1) * np.array(item[feature])