added DistilBertForSequenceClassificationCompat

aIbrahiim · aIbrahiim · commit 478735831fc0 · 2026-05-07T13:05:08.000+03:00
diff --git a/sdks/python/apache_beam/examples/inference/pytorch_sentiment.py b/sdks/python/apache_beam/examples/inference/pytorch_sentiment.py
@@ -81,6 +81,15 @@ def process(self, text: str) -> Iterable[tuple[str, dict]]:
     yield text, {k: torch.squeeze(v) for k, v in tokenized.items()}
 
 
+class DistilBertForSequenceClassificationCompat(
+    DistilBertForSequenceClassification):
+  """Builds config in worker runtime to avoid cross-env config drift."""
+  def __init__(self, model_name: str, num_labels: int = 2):
+    config = _ensure_transformers_config_compat(
+        DistilBertConfig.from_pretrained(model_name, num_labels=num_labels))
+    super().__init__(config)
+
+
 class RateLimitDoFn(beam.DoFn):
   def __init__(self, rate_per_sec: float):
     self.delay = 1.0 / rate_per_sec
@@ -265,12 +274,12 @@ def run(
     method = beam.io.WriteToBigQuery.Method.STREAMING_INSERTS
     pipeline_options.view_as(StandardOptions).streaming = True
 
-  model_config = _ensure_transformers_config_compat(
-      DistilBertConfig.from_pretrained(known_args.model_path, num_labels=2))
-
   model_handler = PytorchModelHandlerKeyedTensor(
-      model_class=DistilBertForSequenceClassification,
-      model_params={'config': model_config},
+      model_class=DistilBertForSequenceClassificationCompat,
+      model_params={
+          'model_name': known_args.model_path,
+          'num_labels': 2,
+      },
       state_dict_path=known_args.model_state_dict_path,
       device='GPU')