From e7aa7b1a1e31849517bbbb502a0de0aef919580a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 13 Sep 2023 11:23:30 -0400 Subject: [PATCH] gracefully handle length feature used for group by (#565) --- src/axolotl/utils/dataloader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/axolotl/utils/dataloader.py b/src/axolotl/utils/dataloader.py index 6d5505baa..d659c3d33 100644 --- a/src/axolotl/utils/dataloader.py +++ b/src/axolotl/utils/dataloader.py @@ -223,6 +223,8 @@ def __iter__(self): concatenated = {} batched_data = [self.dataset[batch_idx] for batch_idx in batch] for feature in features: + if feature == "length": + continue if feature == "attention_mask": arrays = [ (attn_mask_cum_idx + idx + 1) * np.array(item[feature])