fix merge

vlad-karp · vlad-karp · commit 699ca4f25bec · 2026-03-20T23:37:57.000Z
diff --git a/src/maxtext/trainers/post_train/distillation/distillation_utils.py b/src/maxtext/trainers/post_train/distillation/distillation_utils.py
@@ -18,14 +18,11 @@
 model structures with Tunix's training interfaces.
 """
 
-<<<<<<< Updated upstream
 import pickle
 import tensorflow as tf
 from array_record.python import array_record_module
 
-=======
 import abc
->>>>>>> Stashed changes
 from typing import Any, Iterator, Optional, List, Callable
 
 import flax
diff --git a/src/maxtext/trainers/post_train/distillation/train_distill.py b/src/maxtext/trainers/post_train/distillation/train_distill.py
@@ -530,7 +530,6 @@ def train_distill(
   raw_train_iter = _setup_and_restore_input_pipeline(trainer, raw_train_iter, student_config, train_config)
 
   # 8. Configure Input Mapping
-<<<<<<< Updated upstream
   def custom_gen_model_input_fn(batch):
     inputs_dict = {
         "input_tokens": batch.input_tokens,
@@ -560,20 +559,6 @@ def custom_gen_model_input_fn(batch):
     return inputs_dict
 
   trainer = trainer.with_gen_model_input_fn(custom_gen_model_input_fn)
-=======
-  trainer = trainer.with_gen_model_input_fn(
-      lambda batch: {
-          "input_tokens": batch.input_tokens,
-          "positions": batch.positions,
-          "attention_mask": batch.input_mask,
-          "decoder_segment_ids": batch.decoder_segment_ids,
-          "targets": batch.targets,  # Passed to strategy (create_labels)
-          "targets_position": batch.targets_position,  # Passed to strategy (create_labels)
-          "targets_segmentation": batch.targets_segmentation,  # Passed to strategy (create_labels)
-          "cache": None,
-      }
-  )
->>>>>>> Stashed changes
 
   # 9. Create Iterator Wrappers (Use Utils)
   train_iter = distillation_utils.MaxTextToTunixIterator(raw_train_iter)
diff --git a/tests/post_training/unit/train_distill_test.py b/tests/post_training/unit/train_distill_test.py
@@ -359,12 +359,10 @@ def test_monitored_strategy_sft(self):
 
   def _test_monitored_strategy(self, sft_mode: bool):
     """Verifies the strategy calculates metrics and returns the correct tuple."""
-    mock_config = mock.Mock()
-    mock_config.vocab_size = 4
     strategy = distillation_utils.CombinedDistillationStrategy(
         student_forward_fn=lambda m, **k: None,
         teacher_forward_fn=lambda m, **k: None,
-        vocab_size=mock_config.vocab_size,
+        vocab_size=4,
         temperature=1.0,
         alpha=0.5,
         beta_feature=1.0,
@@ -413,12 +411,10 @@ def _test_monitored_strategy(self, sft_mode: bool):
 
   def verify_strategy_compute_eval_loss(self):
     """Covers MonitoredLogitStrategy.compute_eval_loss."""
-    mock_config = mock.Mock()
-    mock_config.vocab_size = 4
     strategy = distillation_utils.CombinedDistillationStrategy(
         student_forward_fn=mock.Mock(),
         teacher_forward_fn=mock.Mock(),
-        vocab_size=mock_config.vocab_size,
+        vocab_size=4,
         # student_config=mock_config,
         temperature=1.0,
         alpha=0.5,