apache
diff --git a/‎amber/src/main/python/core/architecture/packaging/output_manager.py‎
Lines changed: 52 additions & 55 deletions b/‎amber/src/main/python/core/architecture/packaging/output_manager.py‎
Lines changed: 52 additions & 55 deletions
diff --git a/‎amber/src/main/python/core/models/payload.py‎
Lines changed: 8 additions & 0 deletions b/‎amber/src/main/python/core/models/payload.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎amber/src/main/python/core/models/state.py‎
Lines changed: 40 additions & 3 deletions b/‎amber/src/main/python/core/models/state.py‎
Lines changed: 40 additions & 3 deletions
diff --git a/‎amber/src/main/python/core/runnables/network_receiver.py‎
Lines changed: 5 additions & 1 deletion b/‎amber/src/main/python/core/runnables/network_receiver.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎amber/src/main/python/core/runnables/network_sender.py‎
Lines changed: 13 additions & 2 deletions b/‎amber/src/main/python/core/runnables/network_sender.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎amber/src/main/python/core/storage/runnables/input_port_materialization_reader_runnable.py‎
Lines changed: 15 additions & 2 deletions b/‎amber/src/main/python/core/storage/runnables/input_port_materialization_reader_runnable.py‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎amber/src/main/scala/org/apache/texera/amber/engine/architecture/messaginglayer/OutputManager.scala‎
Lines changed: 1 addition & 1 deletion b/‎amber/src/main/scala/org/apache/texera/amber/engine/architecture/messaginglayer/OutputManager.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎amber/src/main/scala/org/apache/texera/amber/engine/architecture/pythonworker/PythonProxyClient.scala‎
Lines changed: 1 addition & 1 deletion b/‎amber/src/main/scala/org/apache/texera/amber/engine/architecture/pythonworker/PythonProxyClient.scala‎
Lines changed: 1 addition & 1 deletion
@@ -133,47 +133,31 @@ def set_up_port_storage_writer(self, port_id: PortIdentity, storage_uri_base: st
         state materialization on the same port. `storage_uri_base` is the
         port's base URI; the result and state URIs are derived from it.
         """
-        document, _ = DocumentFactory.open_document(
-            VFSURIFactory.result_uri(storage_uri_base)
-        )
-        buffered_item_writer = document.writer(str(get_worker_index(self.worker_id)))
-        writer_queue = Queue()
-        port_storage_writer = PortStorageWriter(
-            buffered_item_writer=buffered_item_writer, queue=writer_queue
-        )
-        writer_thread = threading.Thread(
-            target=port_storage_writer.run,
-            daemon=True,
-            name=f"port_storage_writer_thread_{port_id}",
-        )
-        writer_thread.start()
-        self._port_storage_writers[port_id] = (
-            writer_queue,
-            port_storage_writer,
-            writer_thread,
-        )
 
-        state_document, _ = DocumentFactory.open_document(
-            VFSURIFactory.state_uri(storage_uri_base)
-        )
-        state_buffered_item_writer = state_document.writer(
-            str(get_worker_index(self.worker_id))
-        )
-        state_writer_queue = Queue()
-        state_port_writer = PortStorageWriter(
-            buffered_item_writer=state_buffered_item_writer,
-            queue=state_writer_queue,
-        )
-        state_writer_thread = threading.Thread(
-            target=state_port_writer.run,
-            daemon=True,
-            name=f"port_state_writer_thread_{port_id}",
+        def start_writer(uri: str, name_prefix: str, registry: dict) -> None:
+            document, _ = DocumentFactory.open_document(uri)
+            writer_queue = Queue()
+            writer = PortStorageWriter(
+                buffered_item_writer=document.writer(
+                    str(get_worker_index(self.worker_id))
+                ),
+                queue=writer_queue,
+            )
+            thread = threading.Thread(
+                target=writer.run, daemon=True, name=f"{name_prefix}_{port_id}"
+            )
+            thread.start()
+            registry[port_id] = (writer_queue, writer, thread)
+
+        start_writer(
+            VFSURIFactory.result_uri(storage_uri_base),
+            "port_storage_writer_thread",
+            self._port_storage_writers,
         )
-        state_writer_thread.start()
-        self._port_state_writers[port_id] = (
-            state_writer_queue,
-            state_port_writer,
-            state_writer_thread,
+        start_writer(
+            VFSURIFactory.state_uri(storage_uri_base),
+            "port_state_writer_thread",
+            self._port_state_writers,
         )
 
     def get_port(self, port_id=None) -> WorkerPort:
@@ -203,14 +187,22 @@ def save_tuple_to_storage_if_needed(self, tuple_: Tuple, port_id=None) -> None:
                 PortStorageWriterElement(data_tuple=tuple_)
             )
 
-    def save_state_to_storage_if_needed(self, state: State, port_id=None) -> None:
+    def save_state_to_storage_if_needed(
+        self,
+        state: State,
+        loop_counter: int = 0,
+        loop_start_id: str = "",
+        port_id=None,
+    ) -> None:
         # When port_id is omitted the same state row is fanned out to
         # every output port's state table. This mirrors the
         # broadcast-to-all-workers behavior on the emit side: state is
         # shared context, not per-key data, so every downstream operator
         # (and every worker reading the materialization) needs the full
         # set.
-        element = PortStorageWriterElement(data_tuple=state.to_tuple())
+        element = PortStorageWriterElement(
+            data_tuple=state.to_tuple(loop_counter, loop_start_id)
+        )
         if port_id is None:
             for writer_queue, _, _ in self._port_state_writers.values():
                 writer_queue.put(element)
@@ -223,18 +215,16 @@ def close_port_storage_writers(self) -> None:
         writer threads to finish, which indicates the port storage writing
         are finished.
         """
-        for _, writer, _ in self._port_storage_writers.values():
-            # This non-blocking stop call will let the storage writers
-            # flush the remaining buffer
-            writer.stop()
-        for _, _, writer_thread in self._port_storage_writers.values():
-            # This blocking call will wait for all the writer to finish commit
-            writer_thread.join()
-        for _, state_writer, _ in self._port_state_writers.values():
-            state_writer.stop()
-        for _, _, state_writer_thread in self._port_state_writers.values():
-            state_writer_thread.join()
-        self._port_state_writers.clear()
+        for registry in (self._port_storage_writers, self._port_state_writers):
+            # Non-blocking stop lets each writer flush its remaining buffer;
+            # the join then waits for the commit to finish.
+            for _, writer, _ in registry.values():
+                writer.stop()
+            for _, _, thread in registry.values():
+                thread.join()
+            # Drop the stopped writers so a later close doesn't act on
+            # stale entries.
+            registry.clear()
 
     def add_partitioning(self, tag: PhysicalLink, partitioning: Partitioning) -> None:
         """
@@ -290,15 +280,22 @@ def emit_ecm(
         )
 
     def emit_state(
-        self, state: State
+        self,
+        state: State,
+        loop_counter: int = 0,
+        loop_start_id: str = "",
     ) -> Iterable[typing.Tuple[ActorVirtualIdentity, DataPayload]]:
         return chain(
             *(
                 (
                     (
                         receiver,
                         (
-                            StateFrame(payload)
+                            StateFrame(
+                                payload,
+                                loop_counter=loop_counter,
+                                loop_start_id=loop_start_id,
+                            )
                             if isinstance(payload, State)
                             else self.tuple_to_frame(payload)
                         ),
 
@@ -34,3 +34,11 @@ class DataFrame(DataPayload):
 @dataclass
 class StateFrame(DataPayload):
     frame: State
+    # Loop-control bookkeeping owned by the worker runtime, carried alongside
+    # the State payload (not inside it) so it never collides with user state.
+    # Defaults are the "no loop" values for all non-loop state.
+    loop_counter: int = 0
+    # Which LoopStart to jump back to. Set by the runtime on a LoopStart's
+    # output, consumed by the matching LoopEnd. Empty for non-loop /
+    # not-yet-stamped state.
+    loop_start_id: str = ""
@@ -25,13 +25,50 @@
 
 class State(dict):
     CONTENT = "content"
-    SCHEMA = Schema(raw_schema={CONTENT: "STRING"})
+    # Loop-control bookkeeping owned by the worker runtime, NOT user state -- it
+    # never appears in the content JSON. In memory it rides on the StateFrame
+    # envelope; it is materialized/serialized as its own column (parallel to
+    # content) by to_tuple(...). from_tuple() returns the bare State; callers
+    # that need these values read the corresponding columns off the tuple.
+    LOOP_COUNTER = "loop_counter"
+    LOOP_START_ID = "loop_start_id"
+    SCHEMA = Schema(
+        raw_schema={
+            CONTENT: "STRING",
+            LOOP_COUNTER: "LONG",
+            LOOP_START_ID: "STRING",
+        }
+    )
 
     def to_json(self) -> str:
         return json.dumps(_to_json_value(self), separators=(",", ":"))
 
-    def to_tuple(self) -> Tuple:
-        return Tuple({State.CONTENT: self.to_json()}, schema=State.SCHEMA)
+    @staticmethod
+    def to_columns(
+        content_json: str,
+        loop_counter: int = 0,
+        loop_start_id: str = "",
+    ) -> dict:
+        """The single column-name -> value mapping for the State wire/storage
+        format. Both ``to_tuple`` (iceberg materialization) and the network
+        sender build from this, so adding a column is a one-line change here
+        rather than in every serializer.
+        """
+        return {
+            State.CONTENT: content_json,
+            State.LOOP_COUNTER: int(loop_counter),
+            State.LOOP_START_ID: loop_start_id,
+        }
+
+    def to_tuple(
+        self,
+        loop_counter: int = 0,
+        loop_start_id: str = "",
+    ) -> Tuple:
+        return Tuple(
+            State.to_columns(self.to_json(), loop_counter, loop_start_id),
+            schema=State.SCHEMA,
+        )
 
     @classmethod
     def from_json(cls, payload: str) -> "State":
 
@@ -96,7 +96,11 @@ def data_handler(command: bytes, table: Table) -> int:
                 "Data",
                 lambda _: DataFrame(table),
                 "State",
-                lambda _: StateFrame(State.from_json(table[State.CONTENT][0].as_py())),
+                lambda _: StateFrame(
+                    State.from_json(table[State.CONTENT][0].as_py()),
+                    loop_counter=int(table[State.LOOP_COUNTER][0].as_py()),
+                    loop_start_id=table[State.LOOP_START_ID][0].as_py(),
+                ),
                 "ECM",
                 lambda _: EmbeddedControlMessage().parse(table["payload"][0].as_py()),
             )
 
@@ -20,7 +20,13 @@
 from overrides import overrides
 from typing import Optional
 
-from core.models import DataPayload, InternalQueue, DataFrame, State, StateFrame
+from core.models import (
+    DataPayload,
+    InternalQueue,
+    DataFrame,
+    State,
+    StateFrame,
+)
 from core.models.internal_queue import (
     InternalQueueElement,
     DataElement,
@@ -99,8 +105,13 @@ def _send_data(self, to: ChannelIdentity, data_payload: DataPayload) -> None:
             self._proxy_client.send_data(bytes(data_header), data_payload.frame)
         elif isinstance(data_payload, StateFrame):
             data_header = PythonDataHeader(tag=to, payload_type="State")
+            columns = State.to_columns(
+                data_payload.frame.to_json(),
+                data_payload.loop_counter,
+                data_payload.loop_start_id,
+            )
             table = pa.Table.from_pydict(
-                {State.CONTENT: [data_payload.frame.to_json()]},
+                {name: [value] for name, value in columns.items()},
                 schema=State.SCHEMA.as_arrow_schema(),
             )
             self._proxy_client.send_data(bytes(data_header), table)
 
@@ -34,7 +34,14 @@
 from core.architecture.sendsemantics.round_robin_partitioner import (
     RoundRobinPartitioner,
 )
-from core.models import Tuple, InternalQueue, DataFrame, DataPayload, State, StateFrame
+from core.models import (
+    Tuple,
+    InternalQueue,
+    DataFrame,
+    DataPayload,
+    State,
+    StateFrame,
+)
 from core.models.internal_queue import DataElement, ECMElement
 from core.storage.document_factory import DocumentFactory
 from core.storage.vfs_uri_factory import VFSURIFactory
@@ -152,7 +159,13 @@ def run(self) -> None:
                 VFSURIFactory.state_uri(self.uri)
             )
             for state_row in state_document.get():
-                self.emit_payload(StateFrame(State.from_tuple(state_row)))
+                self.emit_payload(
+                    StateFrame(
+                        State.from_tuple(state_row),
+                        loop_counter=state_row[State.LOOP_COUNTER],
+                        loop_start_id=state_row[State.LOOP_START_ID],
+                    )
+                )
 
             storage_iterator = self.materialization.get()
             # Iterate and process tuples.
 
@@ -242,7 +242,7 @@ class OutputManager(
     // emit side: state is shared context, not per-key data, so every
     // downstream operator (and every worker reading the materialization)
     // needs the full set.
-    stateWriterThreads.values.foreach(_.queue.put(Left(state.toTuple)))
+    stateWriterThreads.values.foreach(_.queue.put(Left(state.toTuple())))
   }
 
   /**
 
@@ -125,7 +125,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu
       case DataFrame(frame) =>
         writeArrowStream(mutable.Queue(ArraySeq.unsafeWrapArray(frame): _*), from, "Data")
       case StateFrame(state) =>
-        writeArrowStream(mutable.Queue(state.toTuple), from, "State")
+        writeArrowStream(mutable.Queue(state.toTuple()), from, "State")
     }
   }
Original file line number	Diff line number	Diff line change
`@@ -242,7 +242,7 @@ class OutputManager(`
`242`	`242`	`// emit side: state is shared context, not per-key data, so every`
`243`	`243`	`// downstream operator (and every worker reading the materialization)`
`244`	`244`	`// needs the full set.`
`245`		`- stateWriterThreads.values.foreach(_.queue.put(Left(state.toTuple)))`
	`245`	`+ stateWriterThreads.values.foreach(_.queue.put(Left(state.toTuple())))`
`246`	`246`	`}`
`247`	`247`
`248`	`248`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -125,7 +125,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu`
`125`	`125`	`case DataFrame(frame) =>`
`126`	`126`	`writeArrowStream(mutable.Queue(ArraySeq.unsafeWrapArray(frame): _*), from, "Data")`
`127`	`127`	`case StateFrame(state) =>`
`128`		`- writeArrowStream(mutable.Queue(state.toTuple), from, "State")`
	`128`	`+ writeArrowStream(mutable.Queue(state.toTuple()), from, "State")`
`129`	`129`	`}`
`130`	`130`	`}`
`131`	`131`