Skip to content

Commit 08a3b7f

Browse files
authored
Merge pull request #509 from coding-kitties/feat/504-pipeline-risk-neutrality
feat(pipeline): #504 risk-neutrality primitives (sector neutrality, beta neutralisation, OLS risk models)
2 parents 127115d + c8e7cf4 commit 08a3b7f

8 files changed

Lines changed: 866 additions & 20 deletions

File tree

docusaurus/docs/Advanced Concepts/pipelines.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ A per-symbol time-series computation. Phase 1 ships these built-ins:
7171
| `SMA(window)` | close | simple moving average |
7272
| `RSI(window)` | close | Wilder's RSI |
7373
| `Volatility(window, periods_per_year=252)` | close | annualised stdev of log returns |
74+
| `StaticPerSymbol(mapping, default=None)` || broadcasts a `dict[symbol, value]` (e.g. sector / market-cap) into the cross-section |
75+
| `CrossSectionalMean(base, mask=None)` | base factor | per-bar equal-weight mean across surviving symbols |
76+
| `RollingBeta(target, market, window>=2)` | two factors | rolling-window OLS beta `cov(t,m)/var(m)`; null when `var(m) == 0` |
77+
| `Neutralize(target, exposures=[...], mask=None, add_intercept=True)` | factors | per-bar OLS residualisation of `target` against `exposures`; null on rank-deficient bars |
7478

7579
You can also subclass `CustomFactor` to compute your own.
7680

@@ -101,13 +105,72 @@ factor.zscore(mask=universe) # (x - mean) / std per bar
101105
factor.demean(mask=universe) # x - mean per bar
102106
factor.winsorize(0.01, 0.99, # clip to per-bar quantiles
103107
mask=universe)
108+
109+
# Group-relative variants — stats computed within each group
110+
# (typically sector). `groups` accepts a dict[symbol, key] or any
111+
# Factor that emits a per-symbol category.
112+
factor.zscore(groups=SECTORS) # z-score within sector
113+
factor.demean(groups=SECTORS, mask=universe)
104114
```
105115

106116
Where the cross-sectional `std` is `0` or undefined (e.g. only one
107117
symbol survives the mask), `zscore` returns `null` rather than
108118
`inf`/`NaN`. Masked-out symbols are excluded from the bar's
109119
statistic *and* from the bar's output.
110120

121+
### Risk neutrality
122+
123+
When you want a factor's signal to be independent of structural
124+
exposures (sector, beta to the market, multi-factor risk model),
125+
use the built-in risk-neutrality primitives. They cover three
126+
common cases:
127+
128+
**Sector neutrality** — z-score or demean *within* each sector
129+
instead of across the whole universe by passing `groups=`. The
130+
mapping can be a `dict[symbol, sector]` or any `Factor` that
131+
emits a per-symbol category:
132+
133+
```python
134+
SECTORS = {"AAPL": "Tech", "MSFT": "Tech", "JPM": "Fin", ...}
135+
136+
class SectorNeutralMomentum(Pipeline):
137+
momentum = Returns(window=60)
138+
signal = momentum.zscore(groups=SECTORS) # z-score within sector
139+
```
140+
141+
**Beta neutralisation** — strip a factor's exposure to the market
142+
(or any other reference series) using `RollingBeta` and
143+
`Neutralize`:
144+
145+
```python
146+
from investing_algorithm_framework import (
147+
Returns, RollingBeta, CrossSectionalMean, Neutralize,
148+
)
149+
150+
class BetaNeutralAlpha(Pipeline):
151+
r = Returns(window=1)
152+
market = CrossSectionalMean(r) # equal-weight market
153+
beta = RollingBeta(r, market, window=60)
154+
alpha = Neutralize(r, exposures=[beta]) # market-neutral residual
155+
```
156+
157+
**Multi-factor risk model** — pass several exposures to
158+
`Neutralize` and the residual is orthogonal to all of them at
159+
each bar (per-bar OLS):
160+
161+
```python
162+
class FactorNeutralAlpha(Pipeline):
163+
r = Returns(window=1)
164+
size = StaticPerSymbol(MARKET_CAPS) # cross-sectional size
165+
val = BookToPrice()
166+
mom = Returns(window=252)
167+
residual = Neutralize(r, exposures=[size, val, mom])
168+
```
169+
170+
Bars where the system is rank-deficient (more exposures than
171+
surviving symbols) yield `null` residuals so they're skipped
172+
downstream rather than producing `NaN`.
173+
111174
### Factor algebra
112175

113176
Factors compose via the standard arithmetic operators. The framework

investing_algorithm_framework/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
FillModel, FullFill, VolumeBasedFill, \
3333
FXRateProvider, StaticFXRateProvider, \
3434
Pipeline, Factor, CustomFactor, Filter, \
35-
Returns, AverageDollarVolume, AverageTradedValue, SMA, RSI, Volatility
35+
Returns, AverageDollarVolume, AverageTradedValue, SMA, RSI, \
36+
Volatility, StaticPerSymbol, CrossSectionalMean, RollingBeta, \
37+
Neutralize
3638
from .infrastructure import AzureBlobStorageStateHandler, \
3739
CSVOHLCVDataProvider, CSVTickerDataProvider, CSVURLDataProvider, \
3840
JSONURLDataProvider, ParquetURLDataProvider, \
@@ -269,6 +271,10 @@
269271
"SMA",
270272
"RSI",
271273
"Volatility",
274+
"StaticPerSymbol",
275+
"CrossSectionalMean",
276+
"RollingBeta",
277+
"Neutralize",
272278
"load_ipython_extension",
273279
"get_cv_consistency",
274280
"get_normalized_stability",

investing_algorithm_framework/domain/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@
5757
SMA,
5858
RSI,
5959
Volatility,
60+
StaticPerSymbol,
61+
CrossSectionalMean,
62+
RollingBeta,
63+
Neutralize,
6064
)
6165

6266
__all__ = [
@@ -181,6 +185,10 @@
181185
"SMA",
182186
"RSI",
183187
"Volatility",
188+
"StaticPerSymbol",
189+
"CrossSectionalMean",
190+
"RollingBeta",
191+
"Neutralize",
184192
"Blotter",
185193
"DefaultBlotter",
186194
"SimulationBlotter",

investing_algorithm_framework/domain/pipeline/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,15 @@
1010
from .filter import Filter
1111
from .pipeline import Pipeline
1212
from .factors import (
13-
Returns,
1413
AverageDollarVolume,
1514
AverageTradedValue,
16-
SMA,
15+
CrossSectionalMean,
16+
Neutralize,
17+
Returns,
18+
RollingBeta,
1719
RSI,
20+
SMA,
21+
StaticPerSymbol,
1822
Volatility,
1923
)
2024

@@ -23,10 +27,14 @@
2327
"Factor",
2428
"CustomFactor",
2529
"Filter",
26-
"Returns",
2730
"AverageDollarVolume",
2831
"AverageTradedValue",
29-
"SMA",
32+
"CrossSectionalMean",
33+
"Neutralize",
34+
"Returns",
35+
"RollingBeta",
3036
"RSI",
37+
"SMA",
38+
"StaticPerSymbol",
3139
"Volatility",
3240
]

investing_algorithm_framework/domain/pipeline/factor.py

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,23 +127,44 @@ def bottom(self, n: int) -> "Filter":
127127
# ------------------------------------------------------------------ #
128128
# Cross-sectional transforms (Phase 2 / #502)
129129
# ------------------------------------------------------------------ #
130-
def zscore(self, mask: Optional["Filter"] = None) -> "Factor":
130+
def zscore(
131+
self,
132+
mask: Optional["Filter"] = None,
133+
groups=None,
134+
) -> "Factor":
131135
"""Cross-sectional z-score within each timestamp.
132136
133137
Returns ``(x - mean) / std`` computed over the symbols at each
134138
bar. With ``mask``, symbols outside the mask are excluded from
135139
the mean/std and receive ``null`` in the output.
140+
141+
``groups`` enables **group-relative** (e.g. sector-neutral)
142+
normalisation: the statistic is computed within each
143+
``(datetime, group)`` cell instead of across all symbols. It
144+
accepts:
145+
146+
- a ``dict[str, Any]`` mapping ``symbol`` → group label —
147+
internally wrapped in :class:`StaticPerSymbol`,
148+
- a :class:`Factor` returning a categorical value per row
149+
(e.g. a slow-moving fundamental bucket).
136150
"""
137-
return _Zscore(self, mask=mask)
151+
return _Zscore(self, mask=mask, groups=groups)
138152

139-
def demean(self, mask: Optional["Filter"] = None) -> "Factor":
153+
def demean(
154+
self,
155+
mask: Optional["Filter"] = None,
156+
groups=None,
157+
) -> "Factor":
140158
"""Cross-sectional mean removal within each timestamp.
141159
142160
Returns ``x - mean(x)`` computed over the symbols at each bar.
143161
With ``mask``, symbols outside the mask are excluded from the
144162
mean and receive ``null`` in the output.
163+
164+
``groups`` (same shape as in :meth:`zscore`) enables
165+
group-relative demeaning — e.g. sector neutrality.
145166
"""
146-
return _Demean(self, mask=mask)
167+
return _Demean(self, mask=mask, groups=groups)
147168

148169
def winsorize(
149170
self,
@@ -272,6 +293,28 @@ def _coerce_operand(operand) -> "Factor":
272293
)
273294

274295

296+
def _coerce_groups(groups) -> Optional["Factor"]:
297+
"""Normalise the ``groups`` argument of cross-sectional transforms
298+
into a :class:`Factor` (or ``None``).
299+
300+
Accepts ``None``, a ``dict[symbol, group]`` mapping (auto-wrapped
301+
in :class:`StaticPerSymbol`), or any pre-existing :class:`Factor`.
302+
"""
303+
if groups is None:
304+
return None
305+
if isinstance(groups, Factor):
306+
return groups
307+
if isinstance(groups, dict):
308+
# Local import to avoid an import cycle at module load: the
309+
# built-in factors module imports from this file.
310+
from .factors.builtin import StaticPerSymbol
311+
return StaticPerSymbol(groups)
312+
raise TypeError(
313+
f"Unsupported type for `groups`: {type(groups).__name__}. "
314+
f"Expected None, dict[str, Any], or Factor."
315+
)
316+
317+
275318
class _Constant(Factor):
276319
"""A panel-aligned constant series. Window is 1 (no warmup needed)."""
277320

@@ -371,22 +414,36 @@ class _CrossSectionalTransform(Factor):
371414
Polars expression for the (possibly mask-nulled) factor values and
372415
returns the transformed expression. The base class handles mask
373416
application and per-``datetime`` grouping.
417+
418+
When ``groups`` is provided, statistics are computed within each
419+
``(datetime, group)`` cell instead of across all symbols at a
420+
bar — enabling sector-neutral or otherwise group-relative
421+
transforms. ``groups`` may be a ``dict[symbol, group]`` (wrapped
422+
in :class:`StaticPerSymbol` automatically) or any :class:`Factor`
423+
returning a categorical value per row.
374424
"""
375425

376426
def __init__(
377427
self,
378428
base: Factor,
379429
mask: Optional["Filter"] = None,
430+
groups=None,
380431
) -> None:
381432
super().__init__(window=base.required_window())
382433
self._base = base
383434
self._mask = mask
435+
self._groups = _coerce_groups(groups)
384436
cols = list(base.required_columns())
385437
if mask is not None:
386438
for c in mask.required_columns():
387439
if c not in cols:
388440
cols.append(c)
389441
self.window = max(self.window, mask.required_window())
442+
if self._groups is not None:
443+
for c in self._groups.required_columns():
444+
if c not in cols:
445+
cols.append(c)
446+
self.window = max(self.window, self._groups.required_window())
390447
self.inputs = cols
391448

392449
def required_columns(self) -> List[str]:
@@ -398,6 +455,15 @@ def required_window(self) -> int:
398455
def _transform_expr(self) -> pl.Expr:
399456
raise NotImplementedError # pragma: no cover
400457

458+
def _group_keys(self) -> List[str]:
459+
"""Return the columns to group by for the cross-sectional
460+
statistic. ``["datetime"]`` for the standard case,
461+
``["datetime", "__group__"]`` when ``groups`` is set.
462+
"""
463+
if self._groups is None:
464+
return ["datetime"]
465+
return ["datetime", "__group__"]
466+
401467
def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
402468
values = self._base.evaluate(panel)
403469
df = panel.select(["datetime", "symbol"]).with_columns(
@@ -411,17 +477,21 @@ def compute_panel(self, panel: pl.DataFrame) -> pl.Series:
411477
.otherwise(None)
412478
.alias("__x__")
413479
)
480+
if self._groups is not None:
481+
group_values = self._groups.evaluate(panel)
482+
df = df.with_columns(group_values.alias("__group__"))
414483
df = df.with_columns(self._transform_expr().alias("__out__"))
415484
return df["__out__"]
416485

417486

418487
class _Zscore(_CrossSectionalTransform):
419-
"""Cross-sectional z-score per bar."""
488+
"""Cross-sectional z-score per bar (optionally per group)."""
420489

421490
def _transform_expr(self) -> pl.Expr:
422491
x = pl.col("__x__")
423-
mean = x.mean().over("datetime")
424-
std = x.std().over("datetime")
492+
keys = self._group_keys()
493+
mean = x.mean().over(keys)
494+
std = x.std().over(keys)
425495
# If std is 0 or null, returning null is the safe choice (it
426496
# signals "no dispersion" rather than producing inf/NaN that
427497
# poisons downstream rolling stats).
@@ -433,11 +503,12 @@ def _transform_expr(self) -> pl.Expr:
433503

434504

435505
class _Demean(_CrossSectionalTransform):
436-
"""Cross-sectional mean removal per bar."""
506+
"""Cross-sectional mean removal per bar (optionally per group)."""
437507

438508
def _transform_expr(self) -> pl.Expr:
439509
x = pl.col("__x__")
440-
return x - x.mean().over("datetime")
510+
keys = self._group_keys()
511+
return x - x.mean().over(keys)
441512

442513

443514
class _Winsorize(_CrossSectionalTransform):
Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,26 @@
1-
"""Built-in factors shipped with the Pipeline API (Phase 1)."""
1+
"""Built-in factors shipped with the Pipeline API."""
22
from .builtin import (
3-
Returns,
43
AverageDollarVolume,
54
AverageTradedValue,
6-
SMA,
5+
CrossSectionalMean,
6+
Neutralize,
7+
Returns,
8+
RollingBeta,
79
RSI,
10+
SMA,
11+
StaticPerSymbol,
812
Volatility,
913
)
1014

1115
__all__ = [
12-
"Returns",
1316
"AverageDollarVolume",
1417
"AverageTradedValue",
15-
"SMA",
18+
"CrossSectionalMean",
19+
"Neutralize",
20+
"Returns",
21+
"RollingBeta",
1622
"RSI",
23+
"SMA",
24+
"StaticPerSymbol",
1725
"Volatility",
1826
]

0 commit comments

Comments
 (0)