3131"""
3232
3333# pytype: skip-file
34-
34+ import logging
3535import re
36+ import time
3637from typing import Any
3738from typing import NamedTuple
3839from typing import Optional
@@ -388,7 +389,8 @@ def __init__(
388389 topic : str ,
389390 with_attributes : bool = False ,
390391 id_label : Optional [str ] = None ,
391- timestamp_attribute : Optional [str ] = None ) -> None :
392+ timestamp_attribute : Optional [str ] = None ,
393+ publish_with_ordering_key : bool = False ) -> None :
392394 """Initializes ``WriteToPubSub``.
393395
394396 Args:
@@ -404,9 +406,13 @@ def __init__(
404406 in a ReadFromPubSub PTransform to deduplicate messages.
405407 timestamp_attribute: If set, will set an attribute for each Cloud Pub/Sub
406408 message with the given name and the message's publish time as the value.
409+ publish_with_ordering_key: If True, enables message ordering on the
410+ PublisherClient. Messages with an ordering_key will be delivered
411+ in order. Requires messages to have ordering_key set.
407412 """
408413 super ().__init__ ()
409414 self .with_attributes = with_attributes
415+ self .publish_with_ordering_key = publish_with_ordering_key
410416 self .id_label = id_label
411417 self .timestamp_attribute = timestamp_attribute
412418 self .project , self .topic_name = parse_topic (topic )
@@ -430,7 +436,16 @@ def bytes_to_proto_str(element: Union[bytes, str]) -> bytes:
430436 def expand (self , pcoll ):
431437 # Store pipeline options for use in DoFn
432438 self .pipeline_options = pcoll .pipeline .options if pcoll .pipeline else None
433-
439+ # Warn Dataflow users to use the XLang path for ordering key support,
440+ # since _PubSubWriteDoFn._flush() is not used by Dataflow's implementation.
441+ runner = self .pipeline_options .get_all_options ().get (
442+ 'runner' , '' ) if self .pipeline_options else ''
443+ if 'Dataflow' in str (runner ) and self .publish_with_ordering_key :
444+ logging .warning (
445+ 'WriteToPubSub ordering_key support is not available on Dataflow '
446+ 'via this transform. Use the XLang WriteToPubSub path instead: '
447+ 'apache_beam.io.external.gcp.pubsub.WriteToPubSub with '
448+ 'publish_with_ordering_key=True.' )
434449 if self .with_attributes :
435450 pcoll = pcoll | 'ToProtobufX' >> ParDo (
436451 _AddMetricsAndMap (
@@ -457,6 +472,9 @@ def display_data(self):
457472 True , label = 'With Attributes' ).drop_if_none (),
458473 'timestamp_attribute' : DisplayDataItem (
459474 self .timestamp_attribute , label = 'Timestamp Attribute' ),
475+ 'publish_with_ordering_key' : DisplayDataItem (
476+ self .publish_with_ordering_key ,
477+ label = 'Publish With Ordering Key' ).drop_if_none (),
460478 }
461479
462480
@@ -563,6 +581,7 @@ def __init__(self, transform):
563581 self .id_label = transform .id_label
564582 self .timestamp_attribute = transform .timestamp_attribute
565583 self .with_attributes = transform .with_attributes
584+ self .with_ordering = transform .publish_with_ordering_key
566585
567586 # TODO(https://github.com/apache/beam/issues/18939): Add support for
568587 # id_label and timestamp_attribute.
@@ -597,7 +616,7 @@ def __init__(self, transform):
597616 output_labels_supported = False
598617
599618 # Log debug information for troubleshooting
600- import logging
619+
601620 runner_info = getattr (
602621 pipeline_options , 'runner' ,
603622 'None' ) if pipeline_options else 'No options'
@@ -628,7 +647,13 @@ def __init__(self, transform):
628647
629648 def setup (self ):
630649 from google .cloud import pubsub
631- self ._pub_client = pubsub .PublisherClient ()
650+ if self .with_ordering :
651+ self ._pub_client = pubsub .PublisherClient (
652+ publisher_options = pubsub .types .PublisherOptions (
653+ enable_message_ordering = True ,
654+ ))
655+ else :
656+ self ._pub_client = pubsub .PublisherClient ()
632657 self ._topic = self ._pub_client .topic_path (
633658 self .project , self .short_topic_name )
634659
@@ -647,21 +672,20 @@ def _flush(self):
647672 if not self ._buffer :
648673 return
649674
650- import time
651-
652675 # The elements in buffer are serialized protobuf bytes from the previous
653676 # transforms. We need to deserialize them to extract data and attributes.
654677 futures = []
655678 for elem in self ._buffer :
656679 # Deserialize the protobuf to get the original PubsubMessage
657680 pubsub_msg = PubsubMessage ._from_proto_str (elem )
658681
659- # Publish with the correct data and attributes
682+ # Publish with the correct data, attributes, and ordering_key
683+ kwargs = {}
660684 if self .with_attributes and pubsub_msg .attributes :
661- future = self . _pub_client . publish (
662- self . _topic , pubsub_msg .data , ** pubsub_msg . attributes )
663- else :
664- future = self ._pub_client .publish (self ._topic , pubsub_msg .data )
685+ kwargs . update ( pubsub_msg . attributes )
686+ if pubsub_msg .ordering_key :
687+ kwargs [ 'ordering_key' ] = pubsub_msg . ordering_key
688+ future = self ._pub_client .publish (self ._topic , pubsub_msg .data , ** kwargs )
665689
666690 futures .append (future )
667691
0 commit comments