axolotl-ai-cloud · winglian · Sep 18, 2023 · Sep 18, 2023 · NanoCode012 · Sep 18, 2023
diff --git a/src/axolotl/utils/tokenization.py b/src/axolotl/utils/tokenization.py
@@ -18,21 +18,16 @@ def check_example_labels(example, tokenizer, text_only=False):
     # Get the input_ids, labels, and attention_mask from the dataset
     input_ids = example["input_ids"]
     labels = example["labels"]
-    attention_mask = example["attention_mask"]
 
     # You can compare the input_ids and labels element-wise
     # Remember to ignore positions with IGNORE_TOKEN_ID (if you use it) or attention_mask equal to 0
     colored_tokens = []
-    for _, (input_id, label_id, mask) in enumerate(
-        zip(input_ids, labels, attention_mask)
-    ):
+    for _, (input_id, label_id) in enumerate(zip(input_ids, labels)):
         decoded_input_token = tokenizer.decode(input_id)
         # Choose the color based on whether the label has the ignore value or not
         color = "red" if label_id == -100 else ("yellow" if label_id == 0 else "green")
         colored_token = colored(decoded_input_token, color) + (
-            not text_only
-            and colored(f"({label_id}, {mask}, {input_id})", "white")
-            or ""
+            not text_only and colored(f"({label_id}, {input_id})", "white") or ""
         )
         colored_tokens.append(colored_token)
 

diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
@@ -429,7 +429,7 @@ def calculate_total_num_steps(cfg, train_dataset, tokenizer):
                 .apply(lambda x: len(x))  # pylint: disable=unnecessary-lambda
                 .values
             )
-            LOG.info(f"📝 UPDATE CONFIG WITH: `total_num_tokens: {total_num_tokens}`")
+            LOG.info(f"total_num_tokens: {total_num_tokens}")
             cfg.total_num_tokens = total_num_tokens
 
         if not cfg.total_supervised_tokens:
@@ -489,6 +489,8 @@ def calculate_total_num_steps(cfg, train_dataset, tokenizer):
             data_loader_len = data_loader.len_w_stats()
             actual_eff = data_loader.efficiency()
             LOG.info(f"data_loader_len: {data_loader_len}")
+            # FIXME: is there a bug here somewhere? the total num steps depends
+            # on the agreed on value for sample_packing_eff_est
             total_num_steps = int(math.floor(data_loader_len * cfg.num_epochs))
 
             def calc_sample_packing_eff_est(estimates: List[float]):
@@ -502,10 +504,8 @@ def calc_sample_packing_eff_est(estimates: List[float]):
             sample_packing_eff_est = (
                 math.ceil(sample_packing_actual_eff_all * 100.0) / 100.0
             )
-            LOG.info(
-                f"📝 UPDATE CONFIG WITH: `sample_packing_eff_est: {sample_packing_eff_est}`"
-            )
             cfg.sample_packing_eff_est = sample_packing_eff_est
+            LOG.info(f"sample_packing_eff_est: {cfg.sample_packing_eff_est}")
     else:
         total_num_steps = int(
             math.ceil(len(train_dataset) * cfg.num_epochs / cfg.batch_size)