feat(pipeline): factor arithmetic, cross-sectional transforms, lazy executor (#502 phase 2c+2d)

MDUYN · MDUYN · commit aabf9ae58935 · 2026-05-02T12:13:18.000+02:00
- Factor arithmetic: __neg__, __add__/__radd__, __sub__/__rsub__, __mul__/__rmul__, __truediv__/__rtruediv__ with scalar auto-coercion via _Constant

- Cross-sectional transforms: Factor.zscore(mask=), Factor.demean(mask=), Factor.winsorize(lower, upper, mask=) — all per-bar via over('datetime')

- VectorPipelineEngine(lazy=True) routes the post-factor universe filter + sort through Polars' streaming engine for memory-bound runs

- Document stateless / serverless guarantees for AWS Lambda + Azure Functions deployments (per-evaluation contextvar cache, no cross-invocation state)

- 9 new tests covering arithmetic, transforms, lazy/eager equivalence, and stateless cache lifecycle
diff --git a/docusaurus/docs/Advanced Concepts/pipelines-live.md b/docusaurus/docs/Advanced Concepts/pipelines-live.md
@@ -42,6 +42,30 @@ If you want to experiment, the same example covers both:
 
 The same `Pipeline` subclasses you use in backtests run live.
 
+## Stateless / serverless deployment (AWS Lambda, Azure Functions)
+
+Live trading is frequently deployed on **AWS Lambda** or **Azure
+Functions** via the framework's stateless mode (see
+`investing_algorithm_framework/cli/deploy_to_aws_lambda.py` and
+`deploy_to_azure_function.py`). The pipeline runtime is designed to
+be safe in those environments:
+
+- **No cross-invocation state.** Pipelines hold no module-level
+  mutable state. Each call to `PipelineEngine.evaluate(...)` (event
+  mode) and `VectorPipelineEngine.evaluate_window(...)` (vector mode)
+  builds a fresh panel and a fresh result frame.
+- **Per-evaluation cache, scoped via `contextvars`.** The shared
+  sub-expression cache used by composite factors (e.g. `r + r.zscore()`
+  reusing `r`'s computation) lives in a `ContextVar` that is
+  installed at the start of each `evaluate` call and reset in a
+  `finally` block. A warm Lambda / Functions container reusing the
+  process between invocations sees a clean cache every time.
+- **Pure factor composition.** `Factor.zscore()`, `demean()`,
+  `winsorize()`, and arithmetic (`+ - * /`, unary `-`) all return new
+  factor objects without mutating their inputs. Building a pipeline
+  is a pure operation, so it's safe to construct pipelines at module
+  load time on Lambda/Functions cold start.
+
 ## Want to help?
 
 Track or comment on the implementation issue:
diff --git a/investing_algorithm_framework/domain/pipeline/factor.py b/investing_algorithm_framework/domain/pipeline/factor.py
@@ -124,6 +124,76 @@ def bottom(self, n: int) -> "Filter":
         from .filter import _BottomN
         return _BottomN(self, n)
 
+    # ------------------------------------------------------------------ #
+    # Cross-sectional transforms (Phase 2 / #502)
+    # ------------------------------------------------------------------ #
+    def zscore(self, mask: Optional["Filter"] = None) -> "Factor":
+        """Cross-sectional z-score within each timestamp.
+
+        Returns ``(x - mean) / std`` computed over the symbols at each
+        bar. With ``mask``, symbols outside the mask are excluded from
+        the mean/std and receive ``null`` in the output.
+        """
+        return _Zscore(self, mask=mask)
+
+    def demean(self, mask: Optional["Filter"] = None) -> "Factor":
+        """Cross-sectional mean removal within each timestamp.
+
+        Returns ``x - mean(x)`` computed over the symbols at each bar.
+        With ``mask``, symbols outside the mask are excluded from the
+        mean and receive ``null`` in the output.
+        """
+        return _Demean(self, mask=mask)
+
+    def winsorize(
+        self,
+        lower: float = 0.01,
+        upper: float = 0.99,
+        mask: Optional["Filter"] = None,
+    ) -> "Factor":
+        """Cross-sectional winsorisation within each timestamp.
+
+        Clips values below the ``lower`` quantile and above the
+        ``upper`` quantile (computed per bar). Both bounds are in
+        ``[0, 1]`` with ``lower < upper``.
+        """
+        if not (0.0 <= lower < upper <= 1.0):
+            raise ValueError(
+                f"winsorize requires 0 <= lower < upper <= 1, "
+                f"got lower={lower}, upper={upper}"
+            )
+        return _Winsorize(self, lower=lower, upper=upper, mask=mask)
+
+    # ------------------------------------------------------------------ #
+    # Arithmetic (Phase 2 / #502) — composes Factors into expression trees
+    # ------------------------------------------------------------------ #
+    def __neg__(self) -> "Factor":
+        return _UnaryOp(self, op="neg")
+
+    def __add__(self, other) -> "Factor":
+        return _BinaryOp(self, other, op="add")
+
+    def __radd__(self, other) -> "Factor":
+        return _BinaryOp(other, self, op="add")
+
+    def __sub__(self, other) -> "Factor":
+        return _BinaryOp(self, other, op="sub")
+
+    def __rsub__(self, other) -> "Factor":
+        return _BinaryOp(other, self, op="sub")
+
+    def __mul__(self, other) -> "Factor":
+        return _BinaryOp(self, other, op="mul")
+
+    def __rmul__(self, other) -> "Factor":
+        return _BinaryOp(other, self, op="mul")
+
+    def __truediv__(self, other) -> "Factor":
+        return _BinaryOp(self, other, op="div")
+
+    def __rtruediv__(self, other) -> "Factor":
+        return _BinaryOp(other, self, op="div")
+
     # ------------------------------------------------------------------ #
     # Repr
     # ------------------------------------------------------------------ #
@@ -180,3 +250,218 @@ def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
             .alias("__rank__")
         )
         return ranked["__rank__"]
+
+
+# --------------------------------------------------------------------- #
+# Phase 2 expression-tree wrappers (#502): arithmetic + cross-sectional
+# transforms. Each wrapper composes existing factors into a new factor
+# without losing the per-evaluation cache (they call ``evaluate`` on
+# their children, not ``compute_panel``).
+# --------------------------------------------------------------------- #
+def _coerce_operand(operand) -> "Factor":
+    """Wrap a scalar operand in a :class:`_Constant` so binary ops
+    can treat ``factor + 1`` and ``factor + other_factor`` uniformly.
+    """
+    if isinstance(operand, Factor):
+        return operand
+    if isinstance(operand, (int, float)):
+        return _Constant(float(operand))
+    raise TypeError(
+        f"Unsupported operand type for Factor arithmetic: "
+        f"{type(operand).__name__}"
+    )
+
+
+class _Constant(Factor):
+    """A panel-aligned constant series. Window is 1 (no warmup needed)."""
+
+    inputs: List[str] = []
+
+    def __init__(self, value: float) -> None:
+        super().__init__(window=1)
+        self._value = float(value)
+
+    def required_columns(self) -> List[str]:
+        return []
+
+    def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
+        return pl.Series(
+            "__const__", [self._value] * panel.height, dtype=pl.Float64
+        )
+
+
+class _UnaryOp(Factor):
+    """Element-wise unary op (currently only ``neg``)."""
+
+    def __init__(self, base: Factor, op: str) -> None:
+        super().__init__(window=base.required_window())
+        self._base = base
+        self._op = op
+        self.inputs = list(base.required_columns())
+
+    def required_columns(self) -> List[str]:
+        return list(self.inputs)
+
+    def required_window(self) -> int:
+        return int(self.window)
+
+    def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
+        values = self._base.evaluate(panel)
+        if self._op == "neg":
+            return (-values).rename("__unary__")
+        raise ValueError(f"Unknown unary op: {self._op}")  # pragma: no cover
+
+
+class _BinaryOp(Factor):
+    """Element-wise binary arithmetic between two ``Factor``s.
+
+    Either operand may be a scalar; it is auto-wrapped in
+    :class:`_Constant`.
+    """
+
+    def __init__(self, left, right, op: str) -> None:
+        left_f = _coerce_operand(left)
+        right_f = _coerce_operand(right)
+        super().__init__(
+            window=max(
+                left_f.required_window(), right_f.required_window()
+            )
+        )
+        self._left = left_f
+        self._right = right_f
+        self._op = op
+        cols: List[str] = list(left_f.required_columns())
+        for c in right_f.required_columns():
+            if c not in cols:
+                cols.append(c)
+        self.inputs = cols
+
+    def required_columns(self) -> List[str]:
+        return list(self.inputs)
+
+    def required_window(self) -> int:
+        return int(self.window)
+
+    def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
+        left = self._left.evaluate(panel)
+        right = self._right.evaluate(panel)
+        if self._op == "add":
+            out = left + right
+        elif self._op == "sub":
+            out = left - right
+        elif self._op == "mul":
+            out = left * right
+        elif self._op == "div":
+            # Polars naturally yields nulls when the divisor is null;
+            # division by zero produces inf which we leave as-is so
+            # callers can decide what to do (e.g. ``zscore`` will
+            # propagate inf and downstream filters can drop it).
+            out = left / right
+        else:
+            raise ValueError(  # pragma: no cover
+                f"Unknown binary op: {self._op}"
+            )
+        return out.rename("__binop__")
+
+
+class _CrossSectionalTransform(Factor):
+    """Common base for per-bar transforms (zscore / demean / winsorize).
+
+    Subclasses implement :meth:`_transform_per_bar` which receives a
+    Polars expression for the (possibly mask-nulled) factor values and
+    returns the transformed expression. The base class handles mask
+    application and per-``datetime`` grouping.
+    """
+
+    def __init__(
+        self,
+        base: Factor,
+        mask: Optional["Filter"] = None,
+    ) -> None:
+        super().__init__(window=base.required_window())
+        self._base = base
+        self._mask = mask
+        cols = list(base.required_columns())
+        if mask is not None:
+            for c in mask.required_columns():
+                if c not in cols:
+                    cols.append(c)
+            self.window = max(self.window, mask.required_window())
+        self.inputs = cols
+
+    def required_columns(self) -> List[str]:
+        return list(self.inputs)
+
+    def required_window(self) -> int:
+        return int(self.window)
+
+    def _transform_expr(self) -> pl.Expr:
+        raise NotImplementedError  # pragma: no cover
+
+    def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
+        values = self._base.evaluate(panel)
+        df = panel.select(["datetime", "symbol"]).with_columns(
+            values.alias("__x__")
+        )
+        if self._mask is not None:
+            mask_values = self._mask.evaluate(panel)
+            df = df.with_columns(
+                pl.when(mask_values)
+                .then(pl.col("__x__"))
+                .otherwise(None)
+                .alias("__x__")
+            )
+        df = df.with_columns(self._transform_expr().alias("__out__"))
+        return df["__out__"]
+
+
+class _Zscore(_CrossSectionalTransform):
+    """Cross-sectional z-score per bar."""
+
+    def _transform_expr(self) -> pl.Expr:
+        x = pl.col("__x__")
+        mean = x.mean().over("datetime")
+        std = x.std().over("datetime")
+        # If std is 0 or null, returning null is the safe choice (it
+        # signals "no dispersion" rather than producing inf/NaN that
+        # poisons downstream rolling stats).
+        return (
+            pl.when((std == 0) | std.is_null())
+            .then(None)
+            .otherwise((x - mean) / std)
+        )
+
+
+class _Demean(_CrossSectionalTransform):
+    """Cross-sectional mean removal per bar."""
+
+    def _transform_expr(self) -> pl.Expr:
+        x = pl.col("__x__")
+        return x - x.mean().over("datetime")
+
+
+class _Winsorize(_CrossSectionalTransform):
+    """Cross-sectional clip-to-quantiles per bar."""
+
+    def __init__(
+        self,
+        base: Factor,
+        lower: float,
+        upper: float,
+        mask: Optional["Filter"] = None,
+    ) -> None:
+        super().__init__(base=base, mask=mask)
+        self._lower = float(lower)
+        self._upper = float(upper)
+
+    def _transform_expr(self) -> pl.Expr:
+        x = pl.col("__x__")
+        lo = x.quantile(self._lower).over("datetime")
+        hi = x.quantile(self._upper).over("datetime")
+        return (
+            pl.when(x < lo)
+            .then(lo)
+            .when(x > hi)
+            .then(hi)
+            .otherwise(x)
+        )
diff --git a/investing_algorithm_framework/services/pipeline/vector_pipeline_engine.py b/investing_algorithm_framework/services/pipeline/vector_pipeline_engine.py
@@ -42,8 +42,21 @@ class VectorPipelineEngine:
     Universe filtering is applied as in event mode — symbols failing
     the universe mask at a given bar are dropped from that bar's
     output, and the universe column itself is not exposed.
+
+    Args:
+        lazy: If ``True``, the result frame is assembled via
+            :class:`polars.LazyFrame` and collected with the streaming
+            engine at the end. Useful for memory-bound runs over large
+            universes. Built-in factors are still computed eagerly per
+            symbol (each ``Factor.compute_panel`` returns a ``Series``);
+            only the ``with_columns`` / ``filter`` / ``sort`` pipeline
+            on the wide result frame is deferred. Default ``False``
+            preserves Phase 2a behaviour exactly.
     """
 
+    def __init__(self, lazy: bool = False) -> None:
+        self._lazy = bool(lazy)
+
     # ------------------------------------------------------------------ #
     # Panel construction (delegates to event engine for parity)
     # ------------------------------------------------------------------ #
@@ -130,13 +143,41 @@ def evaluate_panel(
             if universe is not None:
                 mask = universe.evaluate(panel)
                 result = result.with_columns(mask.alias("__universe__"))
+                if self._lazy:
+                    # Stream the filter + drop + sort through Polars'
+                    # streaming engine so memory usage stays bounded
+                    # on large universes.
+                    return self._collect_lazy(
+                        result.lazy()
+                        .filter(pl.col("__universe__"))
+                        .drop("__universe__")
+                        .sort(["datetime", "symbol"])
+                    )
                 result = result.filter(pl.col("__universe__"))
                 result = result.drop("__universe__")
         finally:
             _EVAL_CACHE.reset(token)
 
+        if self._lazy:
+            return self._collect_lazy(
+                result.lazy().sort(["datetime", "symbol"])
+            )
         return result.sort(["datetime", "symbol"])
 
+    @staticmethod
+    def _collect_lazy(lazy: pl.LazyFrame) -> pl.DataFrame:
+        """Collect a :class:`polars.LazyFrame` with the streaming
+        engine when available; fall back to a default collect on
+        older Polars versions that don't accept
+        ``engine="streaming"``.
+        """
+        try:
+            return lazy.collect(engine="streaming")
+        except TypeError:
+            return lazy.collect()
+        except Exception:  # pragma: no cover - polars version drift
+            return lazy.collect()
+
     # ------------------------------------------------------------------ #
     # Slicing helpers
     # ------------------------------------------------------------------ #
diff --git a/tests/services/pipeline/test_factor_arithmetic_and_lazy.py b/tests/services/pipeline/test_factor_arithmetic_and_lazy.py