Merge pull request #3877 from AI-Hypercomputer:bvandermoon-remediate-rce-deserialization

Google-ML-Automation · Google-ML-Automation · commit 8cc5df1d3f1c · 2026-05-12T14:57:01.000-07:00
PiperOrigin-RevId: 914493298
diff --git a/src/maxtext/checkpoint_conversion/standalone_scripts/llama4_ckpt_unscanned.py b/src/maxtext/checkpoint_conversion/standalone_scripts/llama4_ckpt_unscanned.py
@@ -600,9 +600,8 @@ def _convert_pytorch_to_jax_weights(base_model_path: str, model_size: str, model
   for i, ckpt_path in enumerate(ckpt_paths):
     max_logging.log(f"Loading checkpoint {i+1} of {len(ckpt_paths)} ...")
     # NOTE: starting in PT2.6, `weights_only` was switched from the default of `False` to `True`
-    # thus we need to specify this or else loading will fail
     chkpt_vars[int(ckpt_path.name.split(".", maxsplit=2)[1])] = torch.load(
-        ckpt_path, map_location="cpu", weights_only=False
+        ckpt_path, map_location="cpu", weights_only=True
     )
   chkpt_vars = [chkpt_vars[i] for i in sorted(list(chkpt_vars.keys()))]
   # map weight names if they use HuggingFace instead of PyTorch convention
diff --git a/src/maxtext/checkpoint_conversion/standalone_scripts/llama_ckpt_conversion_inference_only.py b/src/maxtext/checkpoint_conversion/standalone_scripts/llama_ckpt_conversion_inference_only.py
@@ -157,7 +157,7 @@ def convert(base_model_path, maxtext_model_path, model_size):
   for i, ckpt_path in enumerate(ckpt_paths):
     print(f"Loading checkpoint {i+1} of {len(ckpt_paths)} ...")
 
-    checkpoint = torch.load(ckpt_path, map_location="cpu")
+    checkpoint = torch.load(ckpt_path, map_location="cpu", weights_only=True)
     pytorch_vars[int(ckpt_path.name.split(".", maxsplit=2)[1])] = checkpoint
     print("memory usage in GB: ", psutil.Process().memory_info().rss / (1024 * 1024))
 
diff --git a/src/maxtext/checkpoint_conversion/standalone_scripts/llama_or_mistral_ckpt.py b/src/maxtext/checkpoint_conversion/standalone_scripts/llama_or_mistral_ckpt.py
@@ -428,7 +428,7 @@ def convert_lora_weights_to_jax_weights(lora_config: dict, model_size: str):
 
   max_logging.log(f"Loading the lora  model from {lora_config['lora_model_path']}")
   # Load LoRA model weights
-  lora_chkpt_vars = torch.load(lora_config["lora_model_path"])
+  lora_chkpt_vars = torch.load(lora_config["lora_model_path"], weights_only=True)
   lora_chkpt_vars = _NamespaceMapper(lora_chkpt_vars)
 
   jax_weights_lora = {
@@ -1112,9 +1112,8 @@ def _convert_pytorch_to_jax_weights(base_model_path: str, model_size: str, model
   for i, ckpt_path in enumerate(ckpt_paths):
     max_logging.log(f"Loading checkpoint {i+1} of {len(ckpt_paths)} ...")
     # NOTE: starting in PT2.6, `weights_only` was switched from the default of `False` to `True`
-    # thus we need to specify this or else loading will fail
     chkpt_vars[int(ckpt_path.name.split(".", maxsplit=2)[1])] = torch.load(
-        ckpt_path, map_location="cpu", weights_only=False
+        ckpt_path, map_location="cpu", weights_only=True
     )
   chkpt_vars = [chkpt_vars[i] for i in sorted(list(chkpt_vars.keys()))]
   # map weight names if they use HuggingFace instead of PyTorch convention
diff --git a/src/maxtext/inference/mlperf/evaluate-accuracy-fast.py b/src/maxtext/inference/mlperf/evaluate-accuracy-fast.py
@@ -19,6 +19,7 @@
 import json
 import nltk
 import numpy as np
+import os
 import pandas as pd
 import tqdm
 
@@ -74,7 +75,19 @@ def get_args():
 
 
 def get_groundtruth(processed_dataset_file):
-  data = pd.read_pickle(processed_dataset_file)
+  """Load the ground truth labels from the processed dataset file securely."""
+  ext = os.path.splitext(processed_dataset_file)[1].lower()
+  if ext == ".parquet":
+    data = pd.read_parquet(processed_dataset_file)
+  elif ext == ".csv":
+    data = pd.read_csv(processed_dataset_file)
+  elif ext in (".json", ".jsonl"):
+    data = pd.read_json(processed_dataset_file)
+  else:
+    raise ValueError(
+        f"Unsupported dataset file format: {processed_dataset_file}. "
+        "Please use safe formats like Parquet (.parquet), CSV (.csv), or JSON/JSONL (.json/.jsonl)."
+    )
   return data["output"]
 
 
diff --git a/src/maxtext/inference/mlperf/evaluate-accuracy.py b/src/maxtext/inference/mlperf/evaluate-accuracy.py
@@ -24,6 +24,7 @@
 
 import numpy as np
 
+import os
 import pandas as pd
 
 
@@ -39,7 +40,19 @@ def get_args():
 
 
 def get_groundtruth(processed_dataset_file):
-  data = pd.read_pickle(processed_dataset_file)
+  """Load the ground truth labels from the processed dataset file securely."""
+  ext = os.path.splitext(processed_dataset_file)[1].lower()
+  if ext == ".parquet":
+    data = pd.read_parquet(processed_dataset_file)
+  elif ext == ".csv":
+    data = pd.read_csv(processed_dataset_file)
+  elif ext in (".json", ".jsonl"):
+    data = pd.read_json(processed_dataset_file)
+  else:
+    raise ValueError(
+        f"Unsupported dataset file format: {processed_dataset_file}. "
+        "Please use safe formats like Parquet (.parquet), CSV (.csv), or JSON/JSONL (.json/.jsonl)."
+    )
   ground_truths = data["output"]
   return ground_truths
 
diff --git a/src/maxtext/inference/mlperf/offline_mode.py b/src/maxtext/inference/mlperf/offline_mode.py
@@ -375,8 +375,18 @@ def main():
   log.info("Mlperf config: %s", args.mlperf_conf)
   log.info("User config: %s", user_conf)
 
-  log.info("dataset path: %s", args.dataset_path)
-  dataset = pd.read_pickle(args.dataset_path)
+  ext = os.path.splitext(args.dataset_path)[1].lower()
+  if ext == ".parquet":
+    dataset = pd.read_parquet(args.dataset_path)
+  elif ext == ".csv":
+    dataset = pd.read_csv(args.dataset_path)
+  elif ext in (".json", ".jsonl"):
+    dataset = pd.read_json(args.dataset_path)
+  else:
+    raise ValueError(
+        f"Unsupported dataset file format: {args.dataset_path}. "
+        "Please use safe formats like Parquet (.parquet), CSV (.csv), or JSON/JSONL (.json/.jsonl)."
+    )
   if args.rename_dataset_cols:
     rename_dict = json.loads(args.rename_dataset_cols)
     dataset.rename(columns=rename_dict, inplace=True)
diff --git a/src/maxtext/trainers/post_train/distillation/distillation_utils.py b/src/maxtext/trainers/post_train/distillation/distillation_utils.py
@@ -19,7 +19,7 @@
 """
 
 import abc
-import pickle
+import safetensors.numpy
 from typing import Any, Callable, Iterator, List, Literal, Optional, Sequence
 
 import flax
@@ -110,7 +110,7 @@ def __next__(self):
 
     record = self.reader.read()
     self.record_index += 1
-    data = pickle.loads(record)
+    data = safetensors.numpy.load(record)
 
     # Map the arrays to match MaxText's expected dictionary
     batch = {
diff --git a/src/maxtext/trainers/post_train/distillation/save_top_k_teacher_logits.py b/src/maxtext/trainers/post_train/distillation/save_top_k_teacher_logits.py
@@ -24,7 +24,7 @@
 """
 
 import os
-import pickle
+import safetensors.numpy
 from typing import Sequence
 import argparse
 import time
@@ -165,7 +165,7 @@ def generate_and_save_data(config, local_args):
         if key in batch:
           record_dict[key] = jax.device_get(batch[key])
 
-      writer.write(pickle.dumps(record_dict))
+      writer.write(safetensors.numpy.save(record_dict))
 
       if step % 50 == 0:
         max_logging.log(f"Successfully processed step {step} in {time.time() - step_start:.4f}s")
diff --git a/src/maxtext/trainers/post_train/distillation/verify_saved_logits.py b/src/maxtext/trainers/post_train/distillation/verify_saved_logits.py
@@ -25,7 +25,7 @@
 import sys
 
 import argparse
-import pickle
+import safetensors.numpy
 from absl import app
 import tensorflow as tf
 from array_record.python import array_record_module
@@ -57,7 +57,7 @@ def verify_array_records(output_dir, expected_steps, expected_k, expected_keys):
 
     for record_idx in range(num_records_in_file):
       record = reader.read()
-      data = pickle.loads(record)
+      data = safetensors.numpy.load(record)
 
       # Verify all required keys are present
       required_keys = ["tokens", "top_k_logits", "top_k_indices"]
diff --git a/src/maxtext/trainers/pre_train/train_compile.py b/src/maxtext/trainers/pre_train/train_compile.py
@@ -23,7 +23,6 @@
 
 import functools
 import os
-import pickle
 from typing import Sequence
 
 from absl import app
@@ -181,7 +180,7 @@ def save_compiled(compiled, save_name):
   """Serialize and save the compiled function."""
   serialized, _, _ = serialize(compiled)
   with open(save_name, "wb") as f:
-    pickle.dump(serialized, f)
+    f.write(serialized)
 
 
 def is_oom(argv: Sequence[str]) -> bool:
diff --git a/src/maxtext/utils/maxtext_utils.py b/src/maxtext/utils/maxtext_utils.py
@@ -16,7 +16,6 @@
 """Utils that are only interesting to MaxText."""
 
 import functools
-import pickle
 import os
 from typing import Sequence
 
@@ -216,8 +215,7 @@ def load_compiled(config, partial_train, state, execution_devices):
   # Parker is working on a serializing these
   def load_serialized_compiled(save_name):
     with open(save_name, "rb") as f:
-      serialized_compiled = pickle.load(f)
-    return serialized_compiled
+      return f.read()
 
   def get_train_input_output_trees(func, input_args, input_kwargs):
     _, in_tree_recreated = jax.tree_util.tree_flatten((input_args, input_kwargs))
diff --git a/tests/post_training/unit/distillation_metrics_test.py b/tests/post_training/unit/distillation_metrics_test.py
@@ -34,7 +34,7 @@
 pytestmark = [pytest.mark.cpu_only, pytest.mark.post_training]
 
 import os
-import pickle
+import safetensors.numpy
 import tempfile
 import unittest
 from typing import List, Optional
@@ -233,7 +233,7 @@ def test_offline_iterator_preserves_packing_fields(self):
     with tempfile.TemporaryDirectory() as tmpdir:
       path = os.path.join(tmpdir, "test.array_record")
       writer = array_record_module.ArrayRecordWriter(path, "group_size:1")
-      writer.write(pickle.dumps(record))
+      writer.write(safetensors.numpy.save(record))
       writer.close()
 
       it = distillation_utils.OfflineArrayRecordIterator(path, epochs=1)
diff --git a/tests/unit/train_compile_test.py b/tests/unit/train_compile_test.py
@@ -20,10 +20,12 @@
 """
 
 from absl.testing import parameterized
+import jax
+from jax.experimental.serialize_executable import serialize
 import os.path
-from tempfile import gettempdir
-
+import pickle
 import pytest
+from tempfile import gettempdir, NamedTemporaryFile
 import transformers
 
 
@@ -1053,3 +1055,41 @@ def test_qwen3_5(self):
             "use_tokamax_splash=True",
         )
     )
+
+  @pytest.mark.cpu_only
+  def test_serialization_and_deserialization_formats(self):
+    """Tests that our custom binary save/load functions work securely and legacy fallback triggers warning."""
+
+    def load_serialized_compiled_test(save_name):
+      with open(save_name, "rb") as f:
+        return f.read()
+
+    @jax.jit
+    def add_one(x):
+      return x + 1
+
+    # Compile simply on CPU
+    compiled = add_one.lower(1).compile()
+    serialized, _, _ = serialize(compiled)
+
+    # 1. Save and load JAX compiled step using secure raw binary format
+    with NamedTemporaryFile() as f_secure:
+      with open(f_secure.name, "wb") as f:
+        f.write(serialized)
+
+      loaded_compiled = load_serialized_compiled_test(f_secure.name)
+
+      # Ensure it loaded the correct JAX serialization bytes
+      assert loaded_compiled == serialized
+
+    # 2. Save and load JAX compiled step using legacy pickle format
+    with NamedTemporaryFile() as f_legacy:
+      with open(f_legacy.name, "wb") as f:
+        pickle.dump(serialized, f)
+
+      loaded_legacy = load_serialized_compiled_test(f_legacy.name)
+
+      # Ensure it loaded raw pickled bytes (starting with pickle protocol marker)
+      # and did NOT unpickle them into JAX serialization bytes.
+      assert loaded_legacy.startswith(b"\x80")
+      assert loaded_legacy != serialized
diff --git a/tests/utils/forward_pass_logit_checker.py b/tests/utils/forward_pass_logit_checker.py
@@ -276,14 +276,8 @@ def main(config, test_args):  # pylint: disable=W0621
 
       with jsonlines.open(input_golden_data_path, "r") as f:
         golden_data = list(f)
-    elif input_golden_data_path.suffix in [".pickle", ".pkl"]:
-      max_logging.log("loading hf goldens from pickle file")
-      import pickle  # pylint: disable=import-outside-toplevel
-
-      with open(input_golden_data_path, "rb") as f:
-        golden_data = pickle.load(f)
     else:
-      raise ValueError("golden_logits_path must end with .jsonl or .pickle/.pkl")
+      raise ValueError("golden_logits_path must end with .jsonl")
     max_logging.log(f"loaded {len(golden_data)} golden data points")
     all_data_to_save = []
     for golden_data_index, golden_data_point in enumerate(golden_data):
diff --git a/tests/utils/hf_checkpoint_conversion_checker.py b/tests/utils/hf_checkpoint_conversion_checker.py
@@ -44,7 +44,7 @@ def load_meta(meta_checkpoint_folder):
   meta_tensor = {}
   ckpt_paths = sorted(pathlib.Path(meta_checkpoint_folder).glob("[!.]*.pth"))
   for ckpt_path in ckpt_paths:
-    meta_tensor = torch.load(ckpt_path, map_location="cpu")
+    meta_tensor = torch.load(ckpt_path, map_location="cpu", weights_only=True)
   return meta_tensor
 
 
diff --git a/tools/weight_inspector/weight_inspector.py b/tools/weight_inspector/weight_inspector.py
@@ -24,17 +24,29 @@
 import argparse
 import pickle
 import numpy as np
+from safetensors.torch import load_file
 import torch
+import warnings
 from maxtext.utils import max_logging
 
 
 def inspect_weights(left_path, right_path):
-  """Load the pickle files and compare contents."""
-  with open(left_path, "rb") as file:
-    left_weights = pickle.load(file)
+  """Load the weight files and compare contents."""
 
-  with open(right_path, "rb") as file:
-    right_weights = pickle.load(file)
+  def load_weights_safely(path):
+    if path.endswith(".safetensors"):
+      return load_file(path)
+    else:
+      warnings.warn(
+          f"Loading pickled weights from {path} is insecure and can lead to Remote Code Execution (RCE). "
+          "Please migrate your weights to safe formats like Safetensors (.safetensors).",
+          UserWarning,
+      )
+      with open(path, "rb") as file:
+        return pickle.load(file)
+
+  left_weights = load_weights_safely(left_path)
+  right_weights = load_weights_safely(right_path)
   assert sorted(left_weights.keys()) == sorted(
       right_weights.keys()
   ), f"Weights structure does not match! {list(set(left_weights.keys()).symmetric_difference(right_weights.keys()))}"