apache · Amar3tto · Mar 18, 2026 · Mar 16, 2026
diff --git a/...oad-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt b/...oad-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_VLLM_Gemma_Batch.txt
@@ -32,5 +32,6 @@
 --metrics_table=gemma_vllm_batch
 --influx_measurement=gemma_vllm_batch
 --model_gcs_path=gs://apache-beam-ml/models/gemma-2b-it
+--requirements_file=apache_beam/ml/inference/vllm_tests_requirements.txt
 --dataflow_service_options=worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver
 --experiments=use_runner_v2
diff --git a/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py b/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py
@@ -52,8 +52,7 @@ def tokenize_sentence(
     text_and_mask: tuple[str, str],
     bert_tokenizer: BertTokenizer) -> tuple[str, dict[str, torch.Tensor]]:
   text, masked_text = text_and_mask
-  tokenized_sentence = bert_tokenizer.encode_plus(
-      masked_text, return_tensors="pt")
+  tokenized_sentence = bert_tokenizer(masked_text, return_tensors="pt")
 
   # Workaround to manually remove batch dim until we have the feature to
   # add optional batching flag.

diff --git a/sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py b/sdks/python/apache_beam/examples/inference/vllm_gemma_batch.py
@@ -103,7 +103,6 @@ def run(argv=None, save_main_session=True, test_pipeline=None):
 
   gem = opts.view_as(GemmaVLLMOptions)
   opts.view_as(SetupOptions).save_main_session = save_main_session
-
   logging.info("Pipeline starting with model path: %s", gem.model_gcs_path)
   handler = GcsVLLMCompletionsModelHandler(
       model_name=gem.model_gcs_path,

diff --git a/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/vllm_tests_requirements.txt
@@ -17,6 +17,7 @@
 torch>=1.7.1
 torchvision>=0.8.2
 pillow>=8.0.0
-transformers>=4.18.0
+transformers==4.57.1
+sentencepiece==0.2.1
 google-cloud-monitoring>=2.27.0
 openai>=1.52.2