From 3ba8fa9163cc562881a1c864ac6e21f5203cb315 Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Mon, 23 Mar 2026 17:03:53 +0800
Subject: [PATCH 01/10] docs: update README operator table, fix CLAUDE.md
 outdated sections

README.md:
- Added PyPI version badge and CI status badge
- Complete operator table: added all missing operators across all categories
  (was missing ~20 operators including ts_regression, trade_when, winsorize,
  truncate, group_mean, group_median, group_backfill, all base ops)
- Removed non-existent 'ln' from math operators
- Added interval parameter example in Quick Start

CLAUDE.md:
- Section 7: updated code review rules to reflect single-developer mode
- Section 9: replaced hardcoded v0.2.0 with generic vX.Y.Z placeholders
- Section 10: marked infrastructure setup as already configured

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md | 49 ++++++++++++++++++-------------------------------
 README.md | 53 +++++++++++++++++++++++++++--------------------------
 2 files changed, 45 insertions(+), 57 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index aeaa1af..600ad7e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -178,7 +178,8 @@ negative values. Previously returned null, now returns correct product.
 
 ## 7. Code Review Rules
 
-- All PRs require at least one reviewer approval before merge
+- When the team has multiple developers, enable "Require approvals" in branch protection
+- Currently (single-developer mode): CI status checks are required, review approval is optional
 - Reviewer must verify:
   1. Tests pass and cover the change
   2. Numerical correctness (manually verify at least one expected value)
@@ -220,26 +221,26 @@ pytest tests/ -v
 ruff check elvers/
 
 # 2. Update version number (single source of truth)
-#    Edit elvers/__init__.py: __version__ = "0.2.0"
+#    Edit elvers/__init__.py: __version__ = "X.Y.Z"
 
 # 3. Update CHANGELOG.md
-#    Move items from [Unreleased] to [0.2.0] - YYYY-MM-DD
+#    Move items from [Unreleased] to [X.Y.Z] - YYYY-MM-DD
 
 # 4. Commit the release
 git add elvers/__init__.py CHANGELOG.md
-git commit -m "release: v0.2.0"
+git commit -m "release: vX.Y.Z"
 git push origin dev
 
 # 5. Create PR: dev -> main on GitHub
-#    Title: "release: v0.2.0"
+#    Title: "release: vX.Y.Z"
 #    Wait for CI to pass and review approval
 #    Squash merge on GitHub
 
 # 6. Tag on main (after PR merged)
 git checkout main
 git pull origin main
-git tag v0.2.0
-git push origin v0.2.0
+git tag vX.Y.Z
+git push origin vX.Y.Z
 
 # 7. Automated (triggered by tag push):
 #    - CI runs full test suite again
@@ -251,14 +252,14 @@ git push origin v0.2.0
 
 ### What Happens Automatically
 
-When you push a tag like `v0.2.0`:
+When you push a tag like `vX.Y.Z`:
 
 1. `.github/workflows/publish.yml` triggers
 2. Runs full test suite on Python 3.10-3.13 (safety net)
 3. If tests pass: builds package, publishes to PyPI
 4. Creates a GitHub Release page at github.com/quantbai/elvers/releases
    with auto-generated release notes from commit messages
-5. Users can now `pip install elvers==0.2.0`
+5. Users can now `pip install elvers==X.Y.Z  # specific version`
 
 ### What You See on GitHub After Release
 
@@ -268,29 +269,15 @@ When you push a tag like `v0.2.0`:
 
 ---
 
-## 10. One-Time Setup (for repository admin)
+## 10. Setup
 
-### PyPI Trusted Publisher (required for automated publishing)
+### Infrastructure (already configured)
 
-1. Go to https://pypi.org -> Your projects -> elvers -> Publishing
-2. Add a new publisher:
-   - Owner: quantbai
-   - Repository: elvers
-   - Workflow name: publish.yml
-   - Environment: (leave blank)
+- PyPI Trusted Publisher: configured for quantbai/elvers -> publish.yml
+- GitHub Branch Protection on main: require PR, require CI status checks
+- GitHub Actions: ci.yml (push/PR) + publish.yml (tag-triggered release)
 
-### GitHub Branch Protection (strongly recommended)
-
-1. GitHub repo -> Settings -> Branches -> Add rule
-2. Branch name pattern: `main`
-3. Enable:
-   - "Require a pull request before merging"
-   - "Require approvals" (1 minimum)
-   - "Require status checks to pass before merging"
-   - Select required status check: "test"
-4. Save changes
-
-### Local Development Setup (every developer)
+### Local Development Setup (every new developer)
 
 ```bash
 git clone https://github.com/quantbai/elvers.git
@@ -327,8 +314,8 @@ git log --oneline -10            # Recent history
 
 # === Release ===
 python -m build                  # Build package locally (for testing)
-git tag v0.2.0                   # Create version tag
-git push origin v0.2.0           # Push tag (triggers publish)
+git tag vX.Y.Z                   # Create version tag
+git push origin vX.Y.Z           # Push tag (triggers publish)
 ```
 
 ---
diff --git a/README.md b/README.md
index 7d4a05a..d27c280 100644
--- a/README.md
+++ b/README.md
@@ -2,24 +2,25 @@
 
 <img src="https://raw.githubusercontent.com/quantbai/elvers/main/assets/elvers.svg" alt="Elvers" width="500">
 
+[![PyPI](https://img.shields.io/pypi/v/elvers.svg)](https://pypi.org/project/elvers/)
+[![CI](https://github.com/quantbai/elvers/actions/workflows/ci.yml/badge.svg)](https://github.com/quantbai/elvers/actions/workflows/ci.yml)
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
 [![Polars](https://img.shields.io/badge/Polars-1.37+-CD853F.svg)](https://pola.rs/)
 
 </div>
 
-**ELVERS** is a high-performance, strictly typed multi-factor alpha research engine powered by [Polars](https://pola.rs/). 
+**ELVERS** is a high-performance, strictly typed multi-factor alpha research engine powered by [Polars](https://pola.rs/).
 
 
 ## Design Philosophy
 
 Quantitative research requires rapid iteration over large universe panels without compromising execution speed. Legacy pandas-based pipelines are interpretable but inherently scale poorly. elvers addresses this through a robust two-layer abstraction:
 
-- **`Panel`** — A continuous, balanced panel container enforcing strict `(timestamp, symbol)` alignment. It mitigates look-ahead bias and standardizes index integrity across all transformations.
-- **`Factor`** — A fully evaluated, eagerly executed vector of signal exposures. Bound directly to the global panel, factors resolve native Polars expressions instantaneously utilizing highly-parallelized core routines underneath Rust and C. 
-
-The architecture guarantees that complex computational graphs—from primitive time-series aggregations to complex cross-sectional neutralizations—are resolved at theoretical hardware peaks with virtually zero Python interpreter overhead in the hot path.
+- **`Panel`** -- A continuous, balanced panel container enforcing strict `(timestamp, symbol)` alignment. It mitigates look-ahead bias and standardizes index integrity across all transformations.
+- **`Factor`** -- A fully evaluated, eagerly executed vector of signal exposures. Bound directly to the global panel, factors resolve native Polars expressions instantaneously utilizing highly-parallelized core routines underneath Rust and C.
 
+The architecture guarantees that complex computational graphs--from primitive time-series aggregations to complex cross-sectional neutralizations--are resolved at theoretical hardware peaks with virtually zero Python interpreter overhead in the hot path.
 
 
 ## Installation
@@ -29,8 +30,8 @@ pip install elvers
 ```
 
 
-
 ## Quick Start
+
 Compose factors exactly as intuitively as they are expressed mathematically:
 
 ```python
@@ -38,6 +39,7 @@ from elvers import load, ts_rank, zscore
 
 # Load your own data
 # panel = load("your_ohlcv.csv")
+# panel = load("hourly_data.parquet", interval="1h")
 
 # Load built-in sample dataset
 panel = load()
@@ -56,20 +58,20 @@ print(result)
 
 Both `Panel` and `Factor` expose a `.df` property that returns the underlying `pl.DataFrame`:
 
-- **`panel.df`** — Full panel frame with all OHLCV columns intact.
-- **`factor.df`** — Flat `(T_days * N_symbols, 3)` frame aligned to the original spatial coordinates:
+- **`panel.df`** -- Full panel frame with all OHLCV columns intact.
+- **`factor.df`** -- Flat `(T * N, 3)` frame aligned to the original spatial coordinates:
 
 ```text
-shape: (T_days * N_symbols, 3)
-┌────────────┬────────┬───────────┐
-│ timestamp  ┆ symbol ┆ factor    │
-│ ---        ┆ ---    ┆ ---       │
-│ date       ┆ str    ┆ f64       │
-╞════════════╪════════╪═══════════╡
-│ 2024-01-01 ┆ BTC    ┆ null      │
-│ ...        ┆ ...    ┆ ...       │
-│ 2024-12-31 ┆ ETH    ┆ 1.243     │
-└────────────┴────────┴───────────┘
+shape: (T * N, 3)
++------------+--------+-----------+
+| timestamp  | symbol | factor    |
+| ---        | ---    | ---       |
+| date       | str    | f64       |
++============+========+===========+
+| 2024-01-01 | BTC    | null      |
+| ...        | ...    | ...       |
+| 2024-12-31 | ETH    | 1.243     |
++------------+--------+-----------+
 ```
 
 Rows are ordered by `timestamp` (ascending), then `symbol` (ascending).
@@ -79,11 +81,10 @@ Rows are ordered by `timestamp` (ascending), then `symbol` (ascending).
 
 ## Operator Library
 
-| Category            | Supported Operators                                                                                                                                                                                                                                                                                                                        |
-| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| **Time-Series**     | `ts_delay`, `ts_delta`, `ts_mean`, `ts_sum`, `ts_std_dev`, `ts_min`, `ts_max`, `ts_median`, `ts_rank`, `ts_skewness`, `ts_kurtosis`, `ts_zscore`, `ts_corr`, `ts_covariance`, `ts_product`, `ts_decay_linear`, `ts_av_diff`, `ts_scale`, `ts_quantile`, `ts_cv`, `ts_autocorr`, `ts_count_nans`, `ts_backfill` |
-| **Cross-Sectional** | `rank`, `zscore`, `mean`, `median`, `scale`, `normalize`, `signal`                                                                                                                                                                                                                                                             |
-| **Neutralization**  | `vector_neut`, `regression_neut`, `group_neutralize`, `group_rank`, `group_zscore`, `group_scale`, `group_normalize`                                                                                                                                                                                                                       |
-| **Math**            | `log`, `ln`, `sqrt`, `sign`, `power`, `signed_power`, `inverse`, `s_log_1p`, `maximum`, `minimum`, `where`, standard operators (`+`, `-`, `*`, `/`, `**`, `abs`)                                                                                                                                                                           |
-
-
+| Category | Operators |
+| --- | --- |
+| **Base** | `add`, `subtract`, `multiply`, `divide`, `reverse`, `densify`, `bucket` |
+| **Time-Series** | `ts_delay`, `ts_delta`, `ts_mean`, `ts_sum`, `ts_std_dev`, `ts_min`, `ts_max`, `ts_median`, `ts_rank`, `ts_skewness`, `ts_kurtosis`, `ts_zscore`, `ts_corr`, `ts_covariance`, `ts_product`, `ts_step`, `ts_decay_linear`, `ts_decay_exp_window`, `days_from_last_change`, `ts_av_diff`, `ts_scale`, `ts_percentile`, `ts_quantile`, `ts_cv`, `ts_autocorr`, `ts_count_nans`, `ts_backfill`, `kth_element`, `last_diff_value`, `inst_tvr`, `ts_delta_limit`, `ts_regression`, `trade_when` |
+| **Cross-Sectional** | `rank`, `zscore`, `mean`, `median`, `scale`, `normalize`, `quantile`, `signal`, `winsorize`, `truncate`, `left_tail`, `right_tail` |
+| **Neutralization** | `vector_neut`, `regression_neut`, `group_neutralize`, `group_rank`, `group_zscore`, `group_scale`, `group_normalize`, `group_mean`, `group_median`, `group_backfill` |
+| **Math** | `log`, `sqrt`, `sign`, `power`, `signed_power`, `inverse`, `s_log_1p`, `maximum`, `minimum`, `where`, standard operators (`+`, `-`, `*`, `/`, `**`, `abs`) |

From 11b879db93a81a32adc1359261af1d2eda7280ab Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Mon, 23 Mar 2026 17:25:18 +0800
Subject: [PATCH 02/10] docs: README rewrite for production-grade presentation

- Replaced marketing language with factual technical statements
- Added Numerical Conventions table documenting all design decisions:
  rank range, ddof conventions, rolling warmup, zero-guard threshold,
  negative product handling, null/NaN/Inf unification
- Usage example now shows realistic alpha pipeline (momentum,
  vol-adjustment, regression residual, sector neutralization)
- Operator descriptions include statistical conventions inline

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 README.md | 102 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 58 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index d27c280..0bcb253 100644
--- a/README.md
+++ b/README.md
@@ -10,18 +10,27 @@
 
 </div>
 
-**ELVERS** is a high-performance, strictly typed multi-factor alpha research engine powered by [Polars](https://pola.rs/).
+Polars-native factor computation engine for quantitative research.
 
+All operators compile to Rust-backed Polars expressions -- no Python loops in the hot path.
 
-## Design Philosophy
+## Core Abstractions
 
-Quantitative research requires rapid iteration over large universe panels without compromising execution speed. Legacy pandas-based pipelines are interpretable but inherently scale poorly. elvers addresses this through a robust two-layer abstraction:
+- **`Panel`** -- Balanced `(timestamp, symbol)` container with strict alignment guarantees. Prevents look-ahead bias by construction.
+- **`Factor`** -- Immutable signal vector. Every operator takes `Factor` and returns `Factor` with eager evaluation. Null propagation follows explicit rules; all division operations are zero-guarded.
 
-- **`Panel`** -- A continuous, balanced panel container enforcing strict `(timestamp, symbol)` alignment. It mitigates look-ahead bias and standardizes index integrity across all transformations.
-- **`Factor`** -- A fully evaluated, eagerly executed vector of signal exposures. Bound directly to the global panel, factors resolve native Polars expressions instantaneously utilizing highly-parallelized core routines underneath Rust and C.
-
-The architecture guarantees that complex computational graphs--from primitive time-series aggregations to complex cross-sectional neutralizations--are resolved at theoretical hardware peaks with virtually zero Python interpreter overhead in the hot path.
+## Numerical Conventions
 
+| Topic | Convention |
+| --- | --- |
+| **Rank range** | `(0, 1]` -- minimum is `1/n`, maximum is `1.0`. Does not pass through zero. Ties use `average` method. Null values are excluded from ranking, not assigned a rank. |
+| **Standard deviation** | Population (ddof=0) for all `std`, `variance`, `zscore`, `normalize` operators. |
+| **Correlation / Covariance** | Sample (ddof=1) for `ts_corr`, `ts_covariance`, `ts_autocorr`. Consistent: `corr(x,y) = cov(x,y) / (std(x) * std(y))` holds when using the same ddof. |
+| **Rolling warmup** | All `ts_*` operators use `min_samples=window`. The first `window-1` values per symbol are null. |
+| **Division by zero** | All divisions are guarded at `abs(denominator) < 1e-10`, returning null. This applies to `divide`, `inverse`, `zscore`, `ts_zscore`, `ts_cv`, `ts_regression`, and all neutralization operators. |
+| **Negative products** | `ts_product` handles negative values via sign-magnitude decomposition: counts negatives in window for sign, computes `exp(sum(log(abs(x))))` for magnitude. Zero in window produces zero. |
+| **Null propagation** | Nulls propagate naturally through all operations. Boundary cases (constant window, insufficient data, zero denominator) return null explicitly -- never through implicit Inf-to-null conversion. |
+| **NaN / Inf / null** | Polars distinguishes NaN (IEEE 754) from null (missing). Elvers unifies them: the Factor constructor converts both NaN and Inf to null on creation. The entire library operates on a single missing-value semantic (null only), eliminating NaN-propagation bugs. No operator produces NaN or Inf as a valid result. |
 
 ## Installation
 
@@ -29,38 +38,23 @@ The architecture guarantees that complex computational graphs--from primitive ti
 pip install elvers
 ```
 
-
-## Quick Start
-
-Compose factors exactly as intuitively as they are expressed mathematically:
+## Usage
 
 ```python
-from elvers import load, ts_rank, zscore
-
-# Load your own data
-# panel = load("your_ohlcv.csv")
-# panel = load("hourly_data.parquet", interval="1h")
+from elvers import load, ts_rank, ts_regression, zscore, signal, group_neutralize
 
-# Load built-in sample dataset
-panel = load()
+panel = load("ohlcv.parquet")           # or load() for built-in sample data
+close, volume = panel["close"], panel["volume"]
 
-close = panel["close"]
-volume = panel["volume"]
+# Compose arbitrarily -- each expression evaluates immediately
+momentum    = ts_rank(close, 20)
+vol_adj     = zscore(momentum) / zscore(ts_rank(volume, 20))
+beta_resid  = ts_regression(close, volume, window=60, rettype=0)
+alpha       = signal(group_neutralize(vol_adj, panel["sector"]))
 
-# Define and execute expressions instantly
-momentum = ts_rank(close, 20)
-alpha = zscore(momentum)
-
-# Extract native Polars DataFrame
-result = alpha.df
-print(result)
+print(alpha.df)
 ```
 
-Both `Panel` and `Factor` expose a `.df` property that returns the underlying `pl.DataFrame`:
-
-- **`panel.df`** -- Full panel frame with all OHLCV columns intact.
-- **`factor.df`** -- Flat `(T * N, 3)` frame aligned to the original spatial coordinates:
-
 ```text
 shape: (T * N, 3)
 +------------+--------+-----------+
@@ -68,23 +62,43 @@ shape: (T * N, 3)
 | ---        | ---    | ---       |
 | date       | str    | f64       |
 +============+========+===========+
-| 2024-01-01 | BTC    | null      |
+| 2024-01-01 | BTC    | -0.167    |
+| 2024-01-01 | ETH    |  0.333    |
 | ...        | ...    | ...       |
-| 2024-12-31 | ETH    | 1.243     |
 +------------+--------+-----------+
 ```
 
-Rows are ordered by `timestamp` (ascending), then `symbol` (ascending).
+Sub-daily data is supported via the `interval` parameter:
+
+```python
+panel = load("hourly.parquet", interval="1h")
+```
+
+## Operators
 
-> **Note**: Rolling window operators naturally yield `null` for the initial `window - 1` periods per symbol. The full dense panel shape is preserved throughout all operations.
+70+ operators across five categories. All return `Factor`.
 
+**Time-Series** -- rolling window per symbol (`min_samples=window`, population std ddof=0, sample corr/cov ddof=1):
+`ts_delay`, `ts_delta`, `ts_mean`, `ts_sum`, `ts_std_dev`, `ts_min`, `ts_max`, `ts_median`, `ts_rank`, `ts_skewness`, `ts_kurtosis`, `ts_zscore`, `ts_corr`, `ts_covariance`, `ts_product`, `ts_step`, `ts_decay_linear`, `ts_decay_exp_window`, `days_from_last_change`, `ts_av_diff`, `ts_scale`, `ts_percentile`, `ts_quantile`, `ts_cv`, `ts_autocorr`, `ts_count_nans`, `ts_backfill`, `kth_element`, `last_diff_value`, `inst_tvr`, `ts_delta_limit`, `ts_regression`, `trade_when`
 
-## Operator Library
+**Cross-Sectional** -- across symbols at each timestamp:
+`rank`, `zscore`, `mean`, `median`, `scale`, `normalize`, `quantile`, `signal`, `winsorize`, `truncate`, `left_tail`, `right_tail`
 
-| Category | Operators |
-| --- | --- |
-| **Base** | `add`, `subtract`, `multiply`, `divide`, `reverse`, `densify`, `bucket` |
-| **Time-Series** | `ts_delay`, `ts_delta`, `ts_mean`, `ts_sum`, `ts_std_dev`, `ts_min`, `ts_max`, `ts_median`, `ts_rank`, `ts_skewness`, `ts_kurtosis`, `ts_zscore`, `ts_corr`, `ts_covariance`, `ts_product`, `ts_step`, `ts_decay_linear`, `ts_decay_exp_window`, `days_from_last_change`, `ts_av_diff`, `ts_scale`, `ts_percentile`, `ts_quantile`, `ts_cv`, `ts_autocorr`, `ts_count_nans`, `ts_backfill`, `kth_element`, `last_diff_value`, `inst_tvr`, `ts_delta_limit`, `ts_regression`, `trade_when` |
-| **Cross-Sectional** | `rank`, `zscore`, `mean`, `median`, `scale`, `normalize`, `quantile`, `signal`, `winsorize`, `truncate`, `left_tail`, `right_tail` |
-| **Neutralization** | `vector_neut`, `regression_neut`, `group_neutralize`, `group_rank`, `group_zscore`, `group_scale`, `group_normalize`, `group_mean`, `group_median`, `group_backfill` |
-| **Math** | `log`, `sqrt`, `sign`, `power`, `signed_power`, `inverse`, `s_log_1p`, `maximum`, `minimum`, `where`, standard operators (`+`, `-`, `*`, `/`, `**`, `abs`) |
+**Neutralization and Group** -- factor-driven grouping for sector/industry neutralization:
+`vector_neut`, `regression_neut`, `group_neutralize`, `group_rank`, `group_zscore`, `group_scale`, `group_normalize`, `group_mean`, `group_median`, `group_backfill`
+
+**Math**:
+`log`, `sqrt`, `sign`, `power`, `signed_power`, `inverse`, `s_log_1p`, `maximum`, `minimum`, `where`
+
+**Arithmetic**:
+`add`, `subtract`, `multiply`, `divide`, `reverse`, `densify`, `bucket`, and standard operators (`+`, `-`, `*`, `/`, `**`, `abs`)
+
+## Development
+
+```bash
+pip install -e ".[dev]"
+pytest tests/ -v
+ruff check elvers/
+```
+
+See [CLAUDE.md](CLAUDE.md) for full development standards.

From 15b36d62a1cafe68a829273eddef7af8b93b1782 Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Mon, 23 Mar 2026 17:27:09 +0800
Subject: [PATCH 03/10] docs: add null arithmetic convention to README

Document the dual-mode null handling in arithmetic operators:
default propagates null (5.0 + null = null), filter=True treats
null as identity element (0 for add/subtract, 1 for multiply).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 0bcb253..4f09224 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ All operators compile to Rust-backed Polars expressions -- no Python loops in th
 | **Rolling warmup** | All `ts_*` operators use `min_samples=window`. The first `window-1` values per symbol are null. |
 | **Division by zero** | All divisions are guarded at `abs(denominator) < 1e-10`, returning null. This applies to `divide`, `inverse`, `zscore`, `ts_zscore`, `ts_cv`, `ts_regression`, and all neutralization operators. |
 | **Negative products** | `ts_product` handles negative values via sign-magnitude decomposition: counts negatives in window for sign, computes `exp(sum(log(abs(x))))` for magnitude. Zero in window produces zero. |
+| **Null in arithmetic** | Default: null propagates (`5.0 + null = null`). The `add`, `subtract`, `multiply` operators accept `filter=True` to treat null as the identity element (0 for add/subtract, 1 for multiply), so `add(a, b, filter=True)` yields `5.0 + null = 5.0`. Direct `+`/`-`/`*` operators always propagate null. |
 | **Null propagation** | Nulls propagate naturally through all operations. Boundary cases (constant window, insufficient data, zero denominator) return null explicitly -- never through implicit Inf-to-null conversion. |
 | **NaN / Inf / null** | Polars distinguishes NaN (IEEE 754) from null (missing). Elvers unifies them: the Factor constructor converts both NaN and Inf to null on creation. The entire library operates on a single missing-value semantic (null only), eliminating NaN-propagation bugs. No operator produces NaN or Inf as a valid result. |
 

From ed68fdc1288cc0525bc47be1188681a0b13f059f Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Mon, 23 Mar 2026 17:29:27 +0800
Subject: [PATCH 04/10] docs: tighten README, remove redundancy

- Merged three overlapping null-handling entries into two
- Removed implementation details from ts_product convention
- Fixed "compile to" -> "execute as" (more precise)
- Removed redundant code comments and output block from Usage
- Operators listed with space-separated formatting for readability

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 README.md | 60 +++++++++++++++++++++----------------------------------
 1 file changed, 23 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 4f09224..f8bb3ff 100644
--- a/README.md
+++ b/README.md
@@ -10,28 +10,25 @@
 
 </div>
 
-Polars-native factor computation engine for quantitative research.
-
-All operators compile to Rust-backed Polars expressions -- no Python loops in the hot path.
+Polars-native factor computation engine for quantitative research. All operators execute as Rust-backed Polars expressions with no Python loops in the hot path.
 
 ## Core Abstractions
 
 - **`Panel`** -- Balanced `(timestamp, symbol)` container with strict alignment guarantees. Prevents look-ahead bias by construction.
-- **`Factor`** -- Immutable signal vector. Every operator takes `Factor` and returns `Factor` with eager evaluation. Null propagation follows explicit rules; all division operations are zero-guarded.
+- **`Factor`** -- Immutable signal vector. Every operator takes `Factor` and returns `Factor` with eager evaluation.
 
 ## Numerical Conventions
 
 | Topic | Convention |
 | --- | --- |
-| **Rank range** | `(0, 1]` -- minimum is `1/n`, maximum is `1.0`. Does not pass through zero. Ties use `average` method. Null values are excluded from ranking, not assigned a rank. |
-| **Standard deviation** | Population (ddof=0) for all `std`, `variance`, `zscore`, `normalize` operators. |
-| **Correlation / Covariance** | Sample (ddof=1) for `ts_corr`, `ts_covariance`, `ts_autocorr`. Consistent: `corr(x,y) = cov(x,y) / (std(x) * std(y))` holds when using the same ddof. |
-| **Rolling warmup** | All `ts_*` operators use `min_samples=window`. The first `window-1` values per symbol are null. |
-| **Division by zero** | All divisions are guarded at `abs(denominator) < 1e-10`, returning null. This applies to `divide`, `inverse`, `zscore`, `ts_zscore`, `ts_cv`, `ts_regression`, and all neutralization operators. |
-| **Negative products** | `ts_product` handles negative values via sign-magnitude decomposition: counts negatives in window for sign, computes `exp(sum(log(abs(x))))` for magnitude. Zero in window produces zero. |
-| **Null in arithmetic** | Default: null propagates (`5.0 + null = null`). The `add`, `subtract`, `multiply` operators accept `filter=True` to treat null as the identity element (0 for add/subtract, 1 for multiply), so `add(a, b, filter=True)` yields `5.0 + null = 5.0`. Direct `+`/`-`/`*` operators always propagate null. |
-| **Null propagation** | Nulls propagate naturally through all operations. Boundary cases (constant window, insufficient data, zero denominator) return null explicitly -- never through implicit Inf-to-null conversion. |
-| **NaN / Inf / null** | Polars distinguishes NaN (IEEE 754) from null (missing). Elvers unifies them: the Factor constructor converts both NaN and Inf to null on creation. The entire library operates on a single missing-value semantic (null only), eliminating NaN-propagation bugs. No operator produces NaN or Inf as a valid result. |
+| **Missing values** | NaN and Inf are converted to null on Factor creation. The library operates on a single missing-value semantic (null only). Nulls propagate through all operations; boundary cases (constant window, insufficient data, zero denominator) return null explicitly. |
+| **Null in arithmetic** | Default: `5.0 + null = null`. The `add`, `subtract`, `multiply` functions accept `filter=True` to treat null as the identity element (0 for +/-, 1 for *). |
+| **Division by zero** | All divisions guarded at `abs(denominator) < 1e-10`, returning null. Applies uniformly across `divide`, `inverse`, `zscore`, `ts_zscore`, `ts_cv`, `ts_regression`, and all neutralization operators. |
+| **Rank** | Range `(0, 1]`. Does not pass through zero. Ties use `average` method. Nulls excluded from ranking. |
+| **Standard deviation** | Population (ddof=0) for `std`, `variance`, `zscore`, `normalize`. |
+| **Correlation / Covariance** | Sample (ddof=1) for `ts_corr`, `ts_covariance`, `ts_autocorr`. Identity `corr(x,y) = cov(x,y) / (std(x) * std(y))` holds. |
+| **Rolling warmup** | All `ts_*` operators require `min_samples=window`. First `window-1` values per symbol are null. |
+| **ts_product** | Correctly handles negative values and zeros. |
 
 ## Installation
 
@@ -47,26 +44,10 @@ from elvers import load, ts_rank, ts_regression, zscore, signal, group_neutraliz
 panel = load("ohlcv.parquet")           # or load() for built-in sample data
 close, volume = panel["close"], panel["volume"]
 
-# Compose arbitrarily -- each expression evaluates immediately
 momentum    = ts_rank(close, 20)
 vol_adj     = zscore(momentum) / zscore(ts_rank(volume, 20))
 beta_resid  = ts_regression(close, volume, window=60, rettype=0)
 alpha       = signal(group_neutralize(vol_adj, panel["sector"]))
-
-print(alpha.df)
-```
-
-```text
-shape: (T * N, 3)
-+------------+--------+-----------+
-| timestamp  | symbol | factor    |
-| ---        | ---    | ---       |
-| date       | str    | f64       |
-+============+========+===========+
-| 2024-01-01 | BTC    | -0.167    |
-| 2024-01-01 | ETH    |  0.333    |
-| ...        | ...    | ...       |
-+------------+--------+-----------+
 ```
 
 Sub-daily data is supported via the `interval` parameter:
@@ -77,22 +58,27 @@ panel = load("hourly.parquet", interval="1h")
 
 ## Operators
 
-70+ operators across five categories. All return `Factor`.
+70+ operators. All accept and return `Factor`.
+
+**Time-Series** -- rolling window per symbol:
 
-**Time-Series** -- rolling window per symbol (`min_samples=window`, population std ddof=0, sample corr/cov ddof=1):
-`ts_delay`, `ts_delta`, `ts_mean`, `ts_sum`, `ts_std_dev`, `ts_min`, `ts_max`, `ts_median`, `ts_rank`, `ts_skewness`, `ts_kurtosis`, `ts_zscore`, `ts_corr`, `ts_covariance`, `ts_product`, `ts_step`, `ts_decay_linear`, `ts_decay_exp_window`, `days_from_last_change`, `ts_av_diff`, `ts_scale`, `ts_percentile`, `ts_quantile`, `ts_cv`, `ts_autocorr`, `ts_count_nans`, `ts_backfill`, `kth_element`, `last_diff_value`, `inst_tvr`, `ts_delta_limit`, `ts_regression`, `trade_when`
+`ts_delay` `ts_delta` `ts_mean` `ts_sum` `ts_std_dev` `ts_min` `ts_max` `ts_median` `ts_rank` `ts_skewness` `ts_kurtosis` `ts_zscore` `ts_corr` `ts_covariance` `ts_product` `ts_step` `ts_decay_linear` `ts_decay_exp_window` `days_from_last_change` `ts_av_diff` `ts_scale` `ts_percentile` `ts_quantile` `ts_cv` `ts_autocorr` `ts_count_nans` `ts_backfill` `kth_element` `last_diff_value` `inst_tvr` `ts_delta_limit` `ts_regression` `trade_when`
 
 **Cross-Sectional** -- across symbols at each timestamp:
-`rank`, `zscore`, `mean`, `median`, `scale`, `normalize`, `quantile`, `signal`, `winsorize`, `truncate`, `left_tail`, `right_tail`
 
-**Neutralization and Group** -- factor-driven grouping for sector/industry neutralization:
-`vector_neut`, `regression_neut`, `group_neutralize`, `group_rank`, `group_zscore`, `group_scale`, `group_normalize`, `group_mean`, `group_median`, `group_backfill`
+`rank` `zscore` `mean` `median` `scale` `normalize` `quantile` `signal` `winsorize` `truncate` `left_tail` `right_tail`
+
+**Neutralization and Group** -- sector/industry neutralization:
+
+`vector_neut` `regression_neut` `group_neutralize` `group_rank` `group_zscore` `group_scale` `group_normalize` `group_mean` `group_median` `group_backfill`
 
 **Math**:
-`log`, `sqrt`, `sign`, `power`, `signed_power`, `inverse`, `s_log_1p`, `maximum`, `minimum`, `where`
+
+`log` `sqrt` `sign` `power` `signed_power` `inverse` `s_log_1p` `maximum` `minimum` `where`
 
 **Arithmetic**:
-`add`, `subtract`, `multiply`, `divide`, `reverse`, `densify`, `bucket`, and standard operators (`+`, `-`, `*`, `/`, `**`, `abs`)
+
+`add` `subtract` `multiply` `divide` `reverse` `densify` `bucket` and standard operators (`+` `-` `*` `/` `**` `abs`)
 
 ## Development
 

From b37174c381064ae2f99035d68fd9bea2bd779122 Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Tue, 24 Mar 2026 16:21:00 +0800
Subject: [PATCH 05/10] docs: add OPERATORS.md with full operator specification

Complete reference for all 79 operators including:
- Global numerical conventions (null semantics, zero guards, ddof, rank range)
- Per-operator documentation (signature, math, null behavior, warmup)
- Explicit comparison notes vs WorldQuant where elvers diverges
  (rank range (0,1] vs [0,1], ts_product negative handling, null vs NaN)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 OPERATORS.md | 652 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 652 insertions(+)
 create mode 100644 OPERATORS.md

diff --git a/OPERATORS.md b/OPERATORS.md
new file mode 100644
index 0000000..a8f50f1
--- /dev/null
+++ b/OPERATORS.md
@@ -0,0 +1,652 @@
+# Elvers Operator Specification
+
+Complete reference for all operators. Each entry documents signature, mathematical
+definition, null/edge-case behavior, and design rationale where elvers diverges
+from common platforms.
+
+---
+
+## Global Numerical Conventions
+
+These conventions apply uniformly across every operator in the library.
+
+### Missing-Value Semantics
+
+elvers operates on a **single missing-value type: Polars null**.
+
+On Factor creation, IEEE 754 `NaN` and `Inf` are converted to `null`.
+This eliminates the NaN-infection problem (`NaN + 1 = NaN`) that silently
+corrupts downstream computations in libraries that distinguish NaN from null.
+
+Once inside elvers, all missing data is null. Operators propagate null naturally
+through Polars expressions; boundary cases (constant window, insufficient data,
+zero denominator) return null explicitly rather than producing NaN or Inf.
+
+### Null Arithmetic
+
+By default, any operation involving null produces null:
+
+```
+5.0 + null = null
+5.0 * null = null
+```
+
+The `add`, `subtract`, and `multiply` operators accept a `filter` parameter.
+When `filter=True`, null is replaced with the identity element before the
+operation:
+
+| Operator | Identity | filter=True behavior |
+| --- | --- | --- |
+| add / subtract | 0 | null treated as 0 |
+| multiply | 1 | null treated as 1 |
+
+> **vs WQ:** WorldQuant uses `NaN` terminology and `filter=false` default.
+> elvers is identical in behavior but uses `null` throughout and spells
+> the parameter as `filter=True` (Python convention).
+
+### Division by Zero
+
+All divisions are guarded at `abs(denominator) < 1e-10`, returning null.
+
+This threshold is applied consistently across: `divide`, `inverse`, `zscore`,
+`ts_zscore`, `ts_cv`, `ts_scale`, `inst_tvr`, `ts_regression`, `scale`,
+`normalize`, `signal`, `group_zscore`, `group_scale`, `group_normalize`,
+`group_mean`, `vector_neut`, and `regression_neut`.
+
+The Factor constructor's implicit Inf-to-null conversion is a safety net,
+not a substitute for explicit zero guards.
+
+### Standard Deviation: ddof Convention
+
+| Context | ddof | Rationale |
+| --- | --- | --- |
+| All `std` / `variance` / `zscore` / `normalize` / `winsorize` | 0 (population) | Cross-sectional and rolling-window contexts operate on the full observed population, not a sample drawn from a larger one. |
+| `ts_corr` / `ts_covariance` / `ts_autocorr` | 1 (sample) | Maintains the identity `corr(x,y) = cov(x,y) / (std(x) * std(y))` when std uses ddof=0 internally, because Pearson correlation requires unbiased covariance estimation. |
+
+This split is consistent across the entire library. Every function that computes
+variance or standard deviation documents which ddof it uses.
+
+### Rank Conventions
+
+**Cross-sectional `rank(x)`:**
+- Output range: **(0, 1]** -- the minimum rank is `1/n`, not 0.
+- Tie-breaking: `average` method (tied values receive the mean of their positions).
+- Null handling: nulls are excluded from ranking and remain null in output.
+- Formula: `average_rank / count_of_non_null`
+
+> **vs WQ:** WorldQuant `rank(x)` outputs range **[0, 1]** inclusive of zero
+> (`Rank(x) = (0.5, 0.25, 0.75, 1, 0)` in their docs). elvers intentionally
+> excludes zero because a rank of 0 is ambiguous -- it could mean "missing" or
+> "lowest". Range (0, 1] ensures every ranked value is unambiguously present.
+
+**Time-series `ts_rank(x, window)`:**
+- Output range: (0, 1] (same convention).
+- Formula: `average_rank / window` within each symbol's rolling window.
+- Tie-breaking: `average` method.
+
+**Group `group_rank(x, group)`:**
+- Output range: (0, 1] within each group.
+- Same average method and null exclusion.
+
+### Rolling Window Warmup
+
+All `ts_*` operators enforce `min_samples=window`. The first `window-1`
+observations per symbol return null. This prevents partial-window statistics
+from contaminating signals.
+
+### ts_product: Negative and Zero Handling
+
+`ts_product(x, window)` uses **sign-magnitude decomposition**:
+
+1. Count negative values in the window to determine product sign.
+2. Compute magnitude via `exp(sum(log(abs(x))))` (numerically stable).
+3. If any value in the window is zero, the product is 0.
+4. If any value is null, it propagates (min_samples applies).
+
+> **vs WQ:** WorldQuant does not document negative-value handling for
+> `ts_product`. A naive `exp(sum(log(x)))` implementation silently returns
+> NaN/null for negative inputs. elvers handles this correctly.
+
+---
+
+## Arithmetic Operators
+
+### `add(a, b, filter=False)`
+
+Element-wise addition of two Factors.
+
+- **filter=False** (default): `null + value = null`
+- **filter=True**: null treated as 0 before addition
+
+```python
+add(close, volume)               # null propagates
+add(close, volume, filter=True)  # null -> 0
+close + volume                   # operator overload, same as add(close, volume)
+```
+
+### `subtract(a, b, filter=False)`
+
+Element-wise subtraction. Same filter semantics as `add`.
+
+```python
+subtract(close, open_)
+close - open_
+```
+
+### `multiply(a, b, filter=False)`
+
+Element-wise multiplication.
+
+- **filter=True**: null treated as 1 (multiplicative identity)
+
+```python
+multiply(close, volume, filter=True)
+close * volume
+```
+
+### `divide(a, b)`
+
+Element-wise division with zero guard.
+
+- Returns null where `abs(b) < 1e-10`
+- No `filter` parameter (division by null is always null)
+
+```python
+divide(close, volume)
+close / volume
+```
+
+### `reverse(x)`
+
+Negation: `-x`.
+
+### `densify(x)`
+
+Remaps integer group labels to consecutive integers `0..(n-1)` where `n` is
+the number of unique non-null values at each timestamp. Uses dense rank (ties
+receive the same value).
+
+Useful for compacting sparse group fields before passing to `group_*` operators.
+
+### `bucket(x, n)`
+
+Assigns each value to one of `n` equal-frequency buckets based on cross-sectional
+rank. Returns integer bucket labels. Null inputs produce null output.
+
+---
+
+## Time-Series Operators
+
+All time-series operators compute per-symbol rolling statistics. They share:
+- `min_samples=window` (first `window-1` values are null)
+- Null propagation within windows
+
+### `ts_delay(x, window)`
+
+Returns the value of `x` from `window` periods ago.
+
+- Warmup: first `window` values are null
+- Math: `x[t - window]`
+
+### `ts_delta(x, window)`
+
+Difference between current value and value `window` periods ago.
+
+- Math: `x[t] - x[t - window]`
+- Warmup: first `window` values are null
+
+### `ts_mean(x, window)`
+
+Simple moving average over `window` periods.
+
+- Math: `(1/n) * sum(x[t-w+1] ... x[t])` where `n` = count of non-null
+- Warmup: `window - 1` null values
+
+### `ts_sum(x, window)`
+
+Rolling sum over `window` periods.
+
+- Math: `sum(x[t-w+1] ... x[t])`
+- Warmup: `window - 1` null values
+
+### `ts_std_dev(x, window)`
+
+Rolling standard deviation.
+
+- Math: population std (ddof=0)
+- Warmup: `window - 1` null values
+
+### `ts_min(x, window)`
+
+Rolling minimum over `window` periods.
+
+### `ts_max(x, window)`
+
+Rolling maximum over `window` periods.
+
+### `ts_median(x, window)`
+
+Rolling median over `window` periods.
+
+### `ts_rank(x, window)`
+
+Rank of current value within its own rolling window.
+
+- Output range: (0, 1]
+- Tie-breaking: `average` method
+- Math: `average_rank / window`
+- Warmup: `window - 1` null values
+
+### `ts_skewness(x, window)`
+
+Rolling skewness (bias=True, population).
+
+- Warmup: `window - 1` null values
+
+### `ts_kurtosis(x, window)`
+
+Rolling excess kurtosis (Fisher definition: normal distribution = 0).
+
+- Warmup: `window - 1` null values
+
+### `ts_zscore(x, window)`
+
+Rolling z-score: `(x - rolling_mean) / rolling_std`.
+
+- ddof=0 for std
+- Zero guard: if `rolling_std < 1e-10`, returns 0 (constant series)
+- Warmup: `window - 1` null values
+
+### `ts_corr(x, y, window)`
+
+Rolling Pearson correlation between two Factors.
+
+- ddof=1 (sample covariance)
+- Range: [-1, 1]
+- Warmup: `window - 1` null values
+
+### `ts_covariance(x, y, window)`
+
+Rolling sample covariance between two Factors.
+
+- ddof=1
+- Warmup: `window - 1` null values
+
+### `ts_product(x, window)`
+
+Rolling product over `window` periods.
+
+- Handles negative values via sign-magnitude decomposition
+- Zero in window: returns 0
+- Math: `sign * exp(sum(log(abs(x))))` where sign = `(-1)^count_negatives`
+- Warmup: `window - 1` null values
+
+### `ts_step(n)`
+
+Returns an incrementing counter starting at 1. Typically used as the independent
+variable in `ts_regression`.
+
+- No warmup (available from first observation)
+
+### `ts_decay_linear(x, window)`
+
+Linearly weighted moving average. Recent values receive higher weights.
+
+- Weights: `[1, 2, ..., window]` (oldest=1, newest=window)
+- Math: `sum(x[i] * w[i]) / sum(w[i])`
+- Warmup: `window - 1` null values
+
+### `ts_decay_exp_window(x, window, factor)`
+
+Exponentially weighted moving average over a fixed window.
+
+- `factor` in (0, 1): smaller = faster decay (less weight on old values)
+- Math: `sum(x[t-i] * factor^i) / sum(factor^i)` for `i = 0..window-1`
+- Warmup: `window - 1` null values
+
+### `days_from_last_change(x)`
+
+Counts the number of periods since the value of `x` last changed.
+
+- Returns 0 on the period where a change occurs
+- No warmup
+
+### `ts_av_diff(x, window)`
+
+Difference between current value and rolling mean.
+
+- Math: `x - ts_mean(x, window)`
+- Warmup: `window - 1` null values
+
+### `ts_scale(x, window)`
+
+Min-max normalization within a rolling window.
+
+- Math: `(x - ts_min(x, w)) / (ts_max(x, w) - ts_min(x, w))`
+- Zero guard: if range < 1e-10, returns 0
+- Output range: [0, 1] when range is non-zero
+- Warmup: `window - 1` null values
+
+### `ts_percentile(x, window, q)`
+
+Rolling percentile of `x` at quantile `q`.
+
+- `q` in [0, 1]
+- Warmup: `window - 1` null values
+
+### `ts_quantile(x, window, driver="gaussian")`
+
+Computes `ts_rank` then applies the inverse CDF of the chosen distribution.
+
+- Supported drivers: `gaussian` (default), `uniform`, `cauchy`
+- Rank values clamped to [0.001, 0.999] before inverse CDF to avoid infinity
+- Warmup: `window - 1` null values
+
+### `ts_cv(x, window)`
+
+Rolling coefficient of variation: `std / abs(mean)`.
+
+- ddof=0 for std
+- Zero guard: if `abs(mean) < 1e-10`, returns null
+- Warmup: `window - 1` null values
+
+### `ts_autocorr(x, window, lag)`
+
+Rolling autocorrelation of `x` with itself lagged by `lag` periods.
+
+- ddof=1 (sample)
+- Warmup: `window - 1` null values
+
+### `ts_count_nans(x, window)`
+
+Counts the number of null values in the rolling window.
+
+- Warmup: `window - 1` null values (returns null, not a count)
+
+### `ts_backfill(x, window, k=1)`
+
+Replaces null values with the k-th most recent non-null value within the
+lookback window.
+
+- `k=1` (default): most recent non-null
+- `k=2`: second most recent non-null
+- No warmup (operates on available data)
+
+### `kth_element(x, window, k, ignore="NaN")`
+
+Returns the k-th non-ignored value looking back over `window` periods.
+
+- `ignore="NaN"`: skip null values
+- `ignore="NaN 0"`: skip null and zero (uses `abs > 1e-10` for zero check)
+
+### `last_diff_value(x, window)`
+
+Returns the most recent value within the lookback window that differs from
+the current value. Returns null if no different value exists.
+
+### `inst_tvr(x, window)`
+
+Instrument turnover ratio: total absolute changes divided by total absolute values
+over the window.
+
+- Math: `sum(abs(delta)) / sum(abs(x))`
+- Zero guard: if denominator < 1e-10, returns 0
+- Warmup: `window - 1` null values
+
+### `ts_delta_limit(x, y, limit_volume=0.1)`
+
+Limits the per-period change in `x` to a fraction of `y`.
+
+### `ts_regression(y, x, window, lag=0, rettype=0)`
+
+Rolling OLS regression of `y` on `x` over `window` periods.
+
+Returns different statistics based on `rettype`:
+
+| rettype | Returns |
+| --- | --- |
+| 0 | Residual (error term) |
+| 1 | Intercept (alpha) |
+| 2 | Slope (beta) |
+| 3 | y-estimate (fitted value) |
+| 4 | SSE (sum of squared errors) |
+| 5 | SST (sum of squared totals) |
+| 6 | R-squared |
+| 7 | MSE (mean squared error) |
+| 8 | Standard error of beta |
+| 9 | Standard error of alpha |
+
+- Zero guard: if `sum(x^2) < 1e-10` or `SST < 1e-10`, returns null where appropriate
+- `lag` parameter shifts `x` by the specified number of periods
+
+### `trade_when(cond, x, exit_value=-1.79e308)`
+
+Holds the value of `x` when `cond` is true; otherwise forward-fills the
+last triggered value. Uses a sentinel value for exit signals.
+
+> **Known limitation:** The sentinel-based approach (`-1.79e308`) should be
+> replaced with a struct-based design in a future version.
+
+---
+
+## Cross-Sectional Operators
+
+All cross-sectional operators compute across all symbols at each timestamp.
+
+### `rank(x)`
+
+Cross-sectional rank normalized to (0, 1].
+
+- Tie-breaking: `average` method
+- Null: excluded from ranking, remain null
+- Formula: `average_rank / count_of_non_null`
+
+> **vs WQ:** `Rank(x) = (0.5, 0.25, 0.75, 1, 0)` in WQ for input `(4,3,6,10,2)`.
+> elvers returns `(0.6, 0.4, 0.8, 1.0, 0.2)` -- no zero, strictly positive.
+> This avoids ambiguity between "rank zero" and "missing data".
+
+### `zscore(x)`
+
+Cross-sectional z-score: `(x - mean) / std`.
+
+- ddof=0 (population)
+- Zero guard: if `std < 1e-10`, returns 0
+
+### `mean(x)`
+
+Cross-sectional mean. Broadcasts the mean value to all symbols at each timestamp.
+
+### `median(x)`
+
+Cross-sectional median. Broadcasts similarly.
+
+### `scale(x, target=1, longscale=1, shortscale=1)`
+
+Scales positions so that `sum(abs(x)) = target`.
+
+- Separate long/short scaling supported via `longscale` and `shortscale`
+- Zero guard: if `sum(abs(x)) < 1e-10`, returns 0
+
+### `normalize(x, use_std=False, limit=0.0)`
+
+Demeans the cross-section: `x - mean(x)`.
+
+- `use_std=True`: additionally divides by std (ddof=0)
+- `limit > 0`: clips result to `[-limit, limit]`
+- Zero guard: if std < 1e-10 (when use_std=True), returns 0
+
+### `quantile(x, driver="gaussian", sigma=1.0)`
+
+Ranks the cross-section then applies inverse CDF of the chosen distribution.
+
+- Supported drivers: `gaussian`, `uniform`, `cauchy`
+- Uses Acklam's rational approximation for Gaussian inverse CDF
+  (max error ~1.15e-9, pure Polars implementation)
+- Rank values shifted to avoid 0 and 1 before inverse CDF
+
+### `signal(x)`
+
+Normalizes `x` to have zero mean and unit absolute sum, with minimum
+participation threshold.
+
+- Zero guard: if `abs_sum < 1e-10` or `valid_count < 2`, returns 0
+- Ensures net-zero portfolio weights
+
+### `winsorize(x, std=4)`
+
+Clips values to `[mean - std*sigma, mean + std*sigma]` where sigma is the
+cross-sectional standard deviation (ddof=0).
+
+### `truncate(x, max_percent=0.01)`
+
+Caps each value so no single position exceeds `max_percent` of the total
+absolute sum.
+
+### `left_tail(x, maximum)`
+
+Nullifies values above `maximum`. Keeps only the left tail.
+
+### `right_tail(x, minimum)`
+
+Nullifies values below `minimum`. Keeps only the right tail.
+
+---
+
+## Neutralization and Group Operators
+
+### `vector_neut(x, y)`
+
+Projects `x` onto `y` and returns the orthogonal residual: `x - proj_y(x)`.
+
+- Math: `x - (dot(x,y) / dot(y,y)) * y`
+- Zero guard: if `dot(y,y) < 1e-10`, returns `x` unchanged
+
+### `regression_neut(y, x)`
+
+OLS cross-sectional neutralization. Returns the residual of regressing `y` on `x`.
+
+- Math: `y - (alpha + beta * x)` where beta = `cov(y,x) / var(x)`
+- Zero guard: if `var(x) < 1e-10`, beta = 0
+
+### `group_neutralize(x, group)`
+
+Subtracts the group mean from each value: `x - group_mean(x)`.
+
+### `group_rank(x, group)`
+
+Rank within each group, normalized to (0, 1].
+
+- Same `average` tie-breaking and null exclusion as `rank()`
+
+### `group_zscore(x, group)`
+
+Z-score within each group.
+
+- ddof=0
+- Zero guard: if group std < 1e-10, returns 0
+
+### `group_scale(x, group)`
+
+Min-max scaling within each group to [0, 1].
+
+- Zero guard: if group range < 1e-10, returns 0
+
+### `group_normalize(x, group, target=1)`
+
+Scales within each group so that `sum(abs(x)) = target`.
+
+- Zero guard: if group abs sum < 1e-10, returns 0
+
+### `group_mean(x, group, weight=None)`
+
+Group mean, optionally weighted.
+
+- If `weight` provided and `sum(weight) < 1e-10`, falls back to unweighted mean
+
+### `group_median(x, group)`
+
+Group median, broadcast back to each member.
+
+### `group_backfill(x, group, std=4)`
+
+Fills null values with the winsorized group mean (clipped at `std` standard
+deviations, ddof=0).
+
+---
+
+## Mathematical Operators
+
+### `log(x, base=None)`
+
+Natural logarithm (default) or logarithm with specified base.
+
+- Returns null for non-positive inputs (Polars native behavior)
+
+### `sqrt(x)`
+
+Square root. Returns null for negative inputs.
+
+> For sign-preserving roots, use `signed_power(x, 0.5)`.
+
+### `sign(x)`
+
+Returns -1, 0, or +1 based on the sign of `x`. Null returns null.
+
+### `power(x, exp)`
+
+Element-wise exponentiation: `x ^ exp`. Accepts scalar or Factor exponent.
+
+### `signed_power(x, exp)`
+
+Sign-preserving power: `sign(x) * abs(x) ^ exp`.
+
+Useful because `power(-3, 2) = 9` (loses sign) while
+`signed_power(-3, 2) = -9` (preserves sign).
+
+### `inverse(x)`
+
+Reciprocal: `1 / x`.
+
+- Zero guard: returns null where `abs(x) < 1e-10`
+
+### `s_log_1p(x)`
+
+Sign-preserving log: `sign(x) * log(1 + abs(x))`.
+
+Compresses large values while preserving sign and order. For small `x`,
+approximately equals `x`.
+
+### `maximum(x, y)`
+
+Element-wise maximum of two Factors (or Factor and scalar).
+
+### `minimum(x, y)`
+
+Element-wise minimum of two Factors (or Factor and scalar).
+
+### `where(cond, x, y)`
+
+Conditional selection: returns `x` where `cond` is truthy, `y` otherwise.
+Null in `cond` is treated as falsy.
+
+---
+
+## Experimental Operators (not exported)
+
+These live in `ops/_dev.py` and are **not production-grade**. They use Python
+callbacks via `rolling_map` and are not included in `__all__`.
+
+### `ts_arg_max(x, window)` [DEV]
+
+Returns how many periods ago the maximum occurred in the window.
+
+### `ts_arg_min(x, window)` [DEV]
+
+Returns how many periods ago the minimum occurred in the window.
+
+### `hump(x, hump=0.01)` [DEV]
+
+Limits turnover by capping per-period changes.
+
+> **Known bug:** `abs_sum` is computed over the entire series instead of a
+> rolling window.

From 18f33ebe8d006a71f31240cd2ffbafcd4b79dbdb Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Tue, 24 Mar 2026 16:21:06 +0800
Subject: [PATCH 06/10] docs: refactor docs to eliminate redundancy

- README.md: condense Numerical Conventions table, link to OPERATORS.md
- CLAUDE.md: add OPERATORS.md reference in Section 4.1, deduplicate
  commands reference (Section 11), add OPERATORS.md to architecture tree

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md | 38 ++++++++++++--------------------------
 README.md | 18 +++++++++---------
 2 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 600ad7e..36caa35 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -36,6 +36,8 @@ elvers/
 tests/
   conftest.py              Test fixtures (make_ts, make_factor)
   test_*.py                One test file per operator module
+OPERATORS.md               Operator specification: numerical conventions, per-operator behavior, WQ comparison
+CLAUDE.md                  Development standards (this file)
 ```
 
 ---
@@ -92,13 +94,15 @@ git push -u origin fix/bug-name
 
 ### 4.1 Numerical Correctness (Highest Priority)
 
+Full numerical conventions and per-operator specifications are in
+[OPERATORS.md](OPERATORS.md). The rules below are for writing new code:
+
 - All divisions MUST have explicit zero guards:
   `pl.when(denom.abs() < 1e-10).then(None).otherwise(num / denom)`
 - NEVER rely on the Factor constructor's implicit Inf-to-null conversion as normal logic flow
-- Statistical convention: ddof=0 (population) for std/variance, ddof=1 (sample) for corr/cov.
-  This is consistent across the entire library.
+- Statistical convention: ddof=0 (population) for std/variance, ddof=1 (sample) for corr/cov
 - Null semantics: null propagates naturally through Polars expressions. Boundary cases
-  (zero denominator, constant window, insufficient data) must be handled explicitly.
+  (zero denominator, constant window, insufficient data) must be handled explicitly
 
 ### 4.2 Operator Writing Rules
 
@@ -289,33 +293,15 @@ pre-commit install
 
 ---
 
-## 11. Commands Reference
+## 11. Quick Reference
 
 ```bash
-# === Setup ===
-pip install -e ".[dev]"          # Install with dev dependencies
-pre-commit install               # Install git hooks
-
-# === Daily Development ===
+pip install -e ".[dev]"          # Setup
+pre-commit install               # Git hooks
 pytest tests/ -v                 # Run all tests
-pytest tests/test_timeseries.py -v                # Single file
-pytest tests/test_timeseries.py::TestTsProduct -v # Single class
-ruff check elvers/               # Lint check
-ruff check elvers/ --fix         # Auto-fix lint issues
+pytest tests/test_timeseries.py::TestTsProduct -v  # Single test class
+ruff check elvers/ --fix         # Lint + auto-fix
 ruff format elvers/              # Format code
-
-# === Git ===
-git status                       # See what changed
-git diff                         # See actual changes
-git add <files>                  # Stage specific files (never git add -A)
-git commit -m "type(scope): msg" # Commit with convention
-git push origin <branch>         # Push to remote
-git log --oneline -10            # Recent history
-
-# === Release ===
-python -m build                  # Build package locally (for testing)
-git tag vX.Y.Z                   # Create version tag
-git push origin vX.Y.Z           # Push tag (triggers publish)
 ```
 
 ---
diff --git a/README.md b/README.md
index f8bb3ff..33c1999 100644
--- a/README.md
+++ b/README.md
@@ -19,16 +19,16 @@ Polars-native factor computation engine for quantitative research. All operators
 
 ## Numerical Conventions
 
-| Topic | Convention |
+| Convention | Summary |
 | --- | --- |
-| **Missing values** | NaN and Inf are converted to null on Factor creation. The library operates on a single missing-value semantic (null only). Nulls propagate through all operations; boundary cases (constant window, insufficient data, zero denominator) return null explicitly. |
-| **Null in arithmetic** | Default: `5.0 + null = null`. The `add`, `subtract`, `multiply` functions accept `filter=True` to treat null as the identity element (0 for +/-, 1 for *). |
-| **Division by zero** | All divisions guarded at `abs(denominator) < 1e-10`, returning null. Applies uniformly across `divide`, `inverse`, `zscore`, `ts_zscore`, `ts_cv`, `ts_regression`, and all neutralization operators. |
-| **Rank** | Range `(0, 1]`. Does not pass through zero. Ties use `average` method. Nulls excluded from ranking. |
-| **Standard deviation** | Population (ddof=0) for `std`, `variance`, `zscore`, `normalize`. |
-| **Correlation / Covariance** | Sample (ddof=1) for `ts_corr`, `ts_covariance`, `ts_autocorr`. Identity `corr(x,y) = cov(x,y) / (std(x) * std(y))` holds. |
-| **Rolling warmup** | All `ts_*` operators require `min_samples=window`. First `window-1` values per symbol are null. |
-| **ts_product** | Correctly handles negative values and zeros. |
+| Null semantics | NaN/Inf unified to null on construction. Single missing-value type throughout. |
+| Division by zero | All divisions guarded at `abs(denom) < 1e-10`, returning null. |
+| Rank range | (0, 1] -- does not pass through zero. Ties use `average` method. |
+| Std/Variance | ddof=0 (population). Corr/Cov use ddof=1 (sample). |
+| Rolling warmup | All `ts_*` operators: first `window-1` values are null. |
+
+Full conventions, WQ comparison notes, and per-operator specifications:
+**[OPERATORS.md](OPERATORS.md)**
 
 ## Installation
 

From 78abaf2fb56d78560722d12d07dec81045f2a76c Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Tue, 24 Mar 2026 16:26:42 +0800
Subject: [PATCH 07/10] docs: remove external platform references,
 self-contained spec

Replace all third-party comparison notes with elvers' own design rationale.
Rank convention now includes concrete example output for 5 instruments.
ts_product documents why sign-magnitude is necessary without referencing
other implementations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md    |  2 +-
 OPERATORS.md | 26 ++++++++++----------------
 README.md    |  2 +-
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 36caa35..cca7867 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -36,7 +36,7 @@ elvers/
 tests/
   conftest.py              Test fixtures (make_ts, make_factor)
   test_*.py                One test file per operator module
-OPERATORS.md               Operator specification: numerical conventions, per-operator behavior, WQ comparison
+OPERATORS.md               Operator specification: numerical conventions, per-operator behavior, design rationale
 CLAUDE.md                  Development standards (this file)
 ```
 
diff --git a/OPERATORS.md b/OPERATORS.md
index a8f50f1..9f8fbb7 100644
--- a/OPERATORS.md
+++ b/OPERATORS.md
@@ -1,8 +1,7 @@
 # Elvers Operator Specification
 
 Complete reference for all operators. Each entry documents signature, mathematical
-definition, null/edge-case behavior, and design rationale where elvers diverges
-from common platforms.
+definition, null/edge-case behavior, and design rationale.
 
 ---
 
@@ -40,10 +39,6 @@ operation:
 | add / subtract | 0 | null treated as 0 |
 | multiply | 1 | null treated as 1 |
 
-> **vs WQ:** WorldQuant uses `NaN` terminology and `filter=false` default.
-> elvers is identical in behavior but uses `null` throughout and spells
-> the parameter as `filter=True` (Python convention).
-
 ### Division by Zero
 
 All divisions are guarded at `abs(denominator) < 1e-10`, returning null.
@@ -74,10 +69,10 @@ variance or standard deviation documents which ddof it uses.
 - Null handling: nulls are excluded from ranking and remain null in output.
 - Formula: `average_rank / count_of_non_null`
 
-> **vs WQ:** WorldQuant `rank(x)` outputs range **[0, 1]** inclusive of zero
-> (`Rank(x) = (0.5, 0.25, 0.75, 1, 0)` in their docs). elvers intentionally
-> excludes zero because a rank of 0 is ambiguous -- it could mean "missing" or
-> "lowest". Range (0, 1] ensures every ranked value is unambiguously present.
+Design rationale: a rank of 0 is ambiguous -- it could mean "missing" or
+"lowest". Range (0, 1] ensures every ranked value is unambiguously present
+and distinguishable from null. For 5 instruments, ranks are
+`(0.2, 0.4, 0.6, 0.8, 1.0)`, not `(0, 0.25, 0.5, 0.75, 1.0)`.
 
 **Time-series `ts_rank(x, window)`:**
 - Output range: (0, 1] (same convention).
@@ -103,9 +98,9 @@ from contaminating signals.
 3. If any value in the window is zero, the product is 0.
 4. If any value is null, it propagates (min_samples applies).
 
-> **vs WQ:** WorldQuant does not document negative-value handling for
-> `ts_product`. A naive `exp(sum(log(x)))` implementation silently returns
-> NaN/null for negative inputs. elvers handles this correctly.
+A naive `exp(sum(log(x)))` implementation silently fails for negative inputs
+because `log(x)` is undefined for `x < 0`. The sign-magnitude approach avoids
+this by operating on absolute values and tracking the sign separately.
 
 ---
 
@@ -441,9 +436,8 @@ Cross-sectional rank normalized to (0, 1].
 - Null: excluded from ranking, remain null
 - Formula: `average_rank / count_of_non_null`
 
-> **vs WQ:** `Rank(x) = (0.5, 0.25, 0.75, 1, 0)` in WQ for input `(4,3,6,10,2)`.
-> elvers returns `(0.6, 0.4, 0.8, 1.0, 0.2)` -- no zero, strictly positive.
-> This avoids ambiguity between "rank zero" and "missing data".
+For input `(4, 3, 6, 10, 2)` across 5 instruments, elvers returns
+`(0.6, 0.4, 0.8, 1.0, 0.2)` -- strictly positive, no zero.
 
 ### `zscore(x)`
 
diff --git a/README.md b/README.md
index 33c1999..097b86e 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ Polars-native factor computation engine for quantitative research. All operators
 | Std/Variance | ddof=0 (population). Corr/Cov use ddof=1 (sample). |
 | Rolling warmup | All `ts_*` operators: first `window-1` values are null. |
 
-Full conventions, WQ comparison notes, and per-operator specifications:
+Full conventions and per-operator specifications:
 **[OPERATORS.md](OPERATORS.md)**
 
 ## Installation

From 27a40d1fa69b37462c7eeb9116753667a5034ca8 Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Tue, 24 Mar 2026 16:39:36 +0800
Subject: [PATCH 08/10] fix(ops): unify ddof=0 for ts_covariance, fix corr
 identity

ts_covariance changed from ddof=1 (sample) to ddof=0 (population)
to match the library-wide convention. This fixes the broken identity:

  ts_covariance(x,y,w) / (ts_std_dev(x,w) * ts_std_dev(y,w)) == ts_corr(x,y,w)

Previously the mixed ddof (cov=1, std=0) caused the left side to
exceed the true correlation by a factor of n/(n-1).

ts_corr and ts_autocorr retain ddof=1 in Polars rolling_corr because
Polars applies ddof only to the numerator (cov), producing incorrect
results with ddof=0. Correlation output is ddof-invariant so the
values are unaffected.

Cross-validated against numpy to machine precision (diff < 1e-15).

[NUMERICAL] ts_covariance output changes: values are now multiplied
by (n-1)/n relative to previous output. For window=20, this is a 5%
reduction. ts_corr and ts_autocorr output is unchanged.
---
 CLAUDE.md                |  4 +++-
 OPERATORS.md             | 27 +++++++++++++++++++--------
 elvers/ops/timeseries.py |  4 ++--
 tests/test_timeseries.py |  4 ++--
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index cca7867..7a3701e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -100,7 +100,9 @@ Full numerical conventions and per-operator specifications are in
 - All divisions MUST have explicit zero guards:
   `pl.when(denom.abs() < 1e-10).then(None).otherwise(num / denom)`
 - NEVER rely on the Factor constructor's implicit Inf-to-null conversion as normal logic flow
-- Statistical convention: ddof=0 (population) for std/variance, ddof=1 (sample) for corr/cov
+- Statistical convention: ddof=0 (population) for all std/variance/covariance.
+  ts_corr/ts_autocorr use ddof=1 in Polars rolling_corr due to a Polars constraint,
+  but correlation output is ddof-invariant. See OPERATORS.md for details.
 - Null semantics: null propagates naturally through Polars expressions. Boundary cases
   (zero denominator, constant window, insufficient data) must be handled explicitly
 
diff --git a/OPERATORS.md b/OPERATORS.md
index 9f8fbb7..bfa409a 100644
--- a/OPERATORS.md
+++ b/OPERATORS.md
@@ -53,13 +53,24 @@ not a substitute for explicit zero guards.
 
 ### Standard Deviation: ddof Convention
 
-| Context | ddof | Rationale |
-| --- | --- | --- |
-| All `std` / `variance` / `zscore` / `normalize` / `winsorize` | 0 (population) | Cross-sectional and rolling-window contexts operate on the full observed population, not a sample drawn from a larger one. |
-| `ts_corr` / `ts_covariance` / `ts_autocorr` | 1 (sample) | Maintains the identity `corr(x,y) = cov(x,y) / (std(x) * std(y))` when std uses ddof=0 internally, because Pearson correlation requires unbiased covariance estimation. |
+All variance, standard deviation, and covariance computations use **ddof=0
+(population)** throughout the library. Rolling windows and cross-sections
+operate on the full observed data, not a sample drawn from a larger population.
+
+This applies to: `ts_std_dev`, `ts_covariance`, `ts_zscore`, `ts_cv`,
+`zscore`, `normalize`, `winsorize`, `group_zscore`, `group_backfill`.
 
-This split is consistent across the entire library. Every function that computes
-variance or standard deviation documents which ddof it uses.
+`ts_corr` and `ts_autocorr` delegate to Polars `rolling_corr`, which
+requires `ddof=1` internally due to an implementation constraint in Polars
+(ddof=0 produces incorrect correlation values). Because ddof cancels in
+the correlation ratio `cov / (std_x * std_y)`, the output is identical
+regardless of ddof. The identity holds:
+
+```
+ts_covariance(x, y, w) / (ts_std_dev(x, w) * ts_std_dev(y, w)) == ts_corr(x, y, w)
+```
+
+This is verified against numpy to machine precision (`diff < 1e-15`).
 
 ### Rank Conventions
 
@@ -262,9 +273,9 @@ Rolling Pearson correlation between two Factors.
 
 ### `ts_covariance(x, y, window)`
 
-Rolling sample covariance between two Factors.
+Rolling population covariance between two Factors.
 
-- ddof=1
+- ddof=0
 - Warmup: `window - 1` null values
 
 ### `ts_product(x, window)`
diff --git a/elvers/ops/timeseries.py b/elvers/ops/timeseries.py
index 5655c8d..9822ae9 100644
--- a/elvers/ops/timeseries.py
+++ b/elvers/ops/timeseries.py
@@ -98,13 +98,13 @@ def ts_corr(a: Factor, b: Factor, window: int) -> Factor:
 
 
 def ts_covariance(a: Factor, b: Factor, window: int) -> Factor:
-    """Rolling sample covariance between two factors over N periods (ddof=1)."""
+    """Rolling population covariance between two factors over N periods (ddof=0)."""
     merged = a.df.rename({"factor": "_a"}).join(
         b.df.select(["timestamp", "symbol", pl.col("factor").alias("_b")]),
         on=["timestamp", "symbol"], how="inner"
     ).sort(["symbol", "timestamp"])
     result = merged.with_columns(
-        pl.rolling_cov(pl.col("_a"), pl.col("_b"), window_size=window, min_samples=window, ddof=1)
+        pl.rolling_cov(pl.col("_a"), pl.col("_b"), window_size=window, min_samples=window, ddof=0)
         .over("symbol").alias("factor")
     ).select(["timestamp", "symbol", "factor"])
     return Factor(result, f"ts_covariance({a.name},{b.name},{window})")
diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py
index d9bdcd1..045550d 100644
--- a/tests/test_timeseries.py
+++ b/tests/test_timeseries.py
@@ -106,8 +106,8 @@ class TestTsCovariance:
     def test_population_cov(self):
         a = make_ts([2.0, 4.0, 6.0, 8.0, 10.0])
         b = make_ts([1.0, 3.0, 5.0, 7.0, 9.0])
-        # ddof=1 (sample covariance): sum((xi-mx)(yi-my))/(n-1) = 40/4 = 10.0
-        assert _last(ts_covariance(a, b, 5))[0] == pytest.approx(10.0, rel=1e-6)
+        # ddof=0 (population covariance): sum((xi-mx)(yi-my))/n = 40/5 = 8.0
+        assert _last(ts_covariance(a, b, 5))[0] == pytest.approx(8.0, rel=1e-6)
 
 
 class TestTsProduct:

From c650efcf285f9ab34320b86707d95e4a04b81f48 Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Tue, 24 Mar 2026 17:14:59 +0800
Subject: [PATCH 09/10] docs: separate reference from rationale in
 documentation

OPERATORS.md rewritten as pure operator reference manual for QR/users:
concise signatures, behavior, parameters, edge cases. No design rationale.

Design decisions (ddof choice, rank range, NaN unification, zero guard
threshold, Polars workarounds) moved to CLAUDE.md Section 4.1 for
developers/LLMs.

README.md: fix outdated ddof description after ts_covariance change.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md    |  27 ++-
 OPERATORS.md | 499 ++++++++++++---------------------------------------
 README.md    |   2 +-
 3 files changed, 134 insertions(+), 394 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 7a3701e..acca1e2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -94,18 +94,35 @@ git push -u origin fix/bug-name
 
 ### 4.1 Numerical Correctness (Highest Priority)
 
-Full numerical conventions and per-operator specifications are in
-[OPERATORS.md](OPERATORS.md). The rules below are for writing new code:
+Operator behavior reference: [OPERATORS.md](OPERATORS.md).
+The rules below are for writing new code:
 
 - All divisions MUST have explicit zero guards:
   `pl.when(denom.abs() < 1e-10).then(None).otherwise(num / denom)`
 - NEVER rely on the Factor constructor's implicit Inf-to-null conversion as normal logic flow
-- Statistical convention: ddof=0 (population) for all std/variance/covariance.
-  ts_corr/ts_autocorr use ddof=1 in Polars rolling_corr due to a Polars constraint,
-  but correlation output is ddof-invariant. See OPERATORS.md for details.
 - Null semantics: null propagates naturally through Polars expressions. Boundary cases
   (zero denominator, constant window, insufficient data) must be handled explicitly
 
+#### Design Decisions (rationale for current conventions)
+
+- **NaN/Inf unified to null**: eliminates the NaN-infection problem (`NaN + 1 = NaN`)
+  that silently corrupts downstream computations. The Factor constructor converts on
+  creation so the entire library operates on a single missing-value type.
+- **ddof=0 everywhere**: rolling windows and cross-sections operate on the full observed
+  population, not a sample from a larger one. ddof=0 is semantically correct and avoids
+  n=1 division-by-zero (ddof=1 divides by n-1=0).
+- **ts_corr/ts_autocorr use ddof=1 internally**: Polars `rolling_corr(ddof=0)` has a bug
+  where ddof only applies to the covariance numerator, not the variance denominator,
+  producing values outside [-1, 1]. Reported: https://github.com/pola-rs/polars/issues/16161.
+  Correlation is ddof-invariant (cancels in ratio), so ddof=1 output is correct.
+- **rank range (0, 1] not [0, 1]**: a rank of 0 is ambiguous (could mean "missing" or
+  "lowest"). Strictly positive range ensures every ranked value is distinguishable from null.
+- **Zero guard threshold 1e-10**: conservative enough to catch near-zero denominators,
+  small enough not to interfere with legitimate small values in financial data.
+- **ts_product sign-magnitude decomposition**: naive `exp(sum(log(x)))` fails for negative
+  inputs because `log(x)` is undefined for x < 0. Separating sign and magnitude handles
+  this correctly.
+
 ### 4.2 Operator Writing Rules
 
 - Time-series operators: always use `min_samples=window` for consistency
diff --git a/OPERATORS.md b/OPERATORS.md
index bfa409a..3e5c7ff 100644
--- a/OPERATORS.md
+++ b/OPERATORS.md
@@ -1,166 +1,46 @@
-# Elvers Operator Specification
+# Elvers Operator Reference
 
-Complete reference for all operators. Each entry documents signature, mathematical
-definition, null/edge-case behavior, and design rationale.
+79 operators. All accept and return `Factor`.
 
 ---
 
-## Global Numerical Conventions
+## Conventions
 
-These conventions apply uniformly across every operator in the library.
-
-### Missing-Value Semantics
-
-elvers operates on a **single missing-value type: Polars null**.
-
-On Factor creation, IEEE 754 `NaN` and `Inf` are converted to `null`.
-This eliminates the NaN-infection problem (`NaN + 1 = NaN`) that silently
-corrupts downstream computations in libraries that distinguish NaN from null.
-
-Once inside elvers, all missing data is null. Operators propagate null naturally
-through Polars expressions; boundary cases (constant window, insufficient data,
-zero denominator) return null explicitly rather than producing NaN or Inf.
-
-### Null Arithmetic
-
-By default, any operation involving null produces null:
-
-```
-5.0 + null = null
-5.0 * null = null
-```
-
-The `add`, `subtract`, and `multiply` operators accept a `filter` parameter.
-When `filter=True`, null is replaced with the identity element before the
-operation:
-
-| Operator | Identity | filter=True behavior |
-| --- | --- | --- |
-| add / subtract | 0 | null treated as 0 |
-| multiply | 1 | null treated as 1 |
-
-### Division by Zero
-
-All divisions are guarded at `abs(denominator) < 1e-10`, returning null.
-
-This threshold is applied consistently across: `divide`, `inverse`, `zscore`,
-`ts_zscore`, `ts_cv`, `ts_scale`, `inst_tvr`, `ts_regression`, `scale`,
-`normalize`, `signal`, `group_zscore`, `group_scale`, `group_normalize`,
-`group_mean`, `vector_neut`, and `regression_neut`.
-
-The Factor constructor's implicit Inf-to-null conversion is a safety net,
-not a substitute for explicit zero guards.
-
-### Standard Deviation: ddof Convention
-
-All variance, standard deviation, and covariance computations use **ddof=0
-(population)** throughout the library. Rolling windows and cross-sections
-operate on the full observed data, not a sample drawn from a larger population.
-
-This applies to: `ts_std_dev`, `ts_covariance`, `ts_zscore`, `ts_cv`,
-`zscore`, `normalize`, `winsorize`, `group_zscore`, `group_backfill`.
-
-`ts_corr` and `ts_autocorr` delegate to Polars `rolling_corr`, which
-requires `ddof=1` internally due to an implementation constraint in Polars
-(ddof=0 produces incorrect correlation values). Because ddof cancels in
-the correlation ratio `cov / (std_x * std_y)`, the output is identical
-regardless of ddof. The identity holds:
-
-```
-ts_covariance(x, y, w) / (ts_std_dev(x, w) * ts_std_dev(y, w)) == ts_corr(x, y, w)
-```
-
-This is verified against numpy to machine precision (`diff < 1e-15`).
-
-### Rank Conventions
-
-**Cross-sectional `rank(x)`:**
-- Output range: **(0, 1]** -- the minimum rank is `1/n`, not 0.
-- Tie-breaking: `average` method (tied values receive the mean of their positions).
-- Null handling: nulls are excluded from ranking and remain null in output.
-- Formula: `average_rank / count_of_non_null`
-
-Design rationale: a rank of 0 is ambiguous -- it could mean "missing" or
-"lowest". Range (0, 1] ensures every ranked value is unambiguously present
-and distinguishable from null. For 5 instruments, ranks are
-`(0.2, 0.4, 0.6, 0.8, 1.0)`, not `(0, 0.25, 0.5, 0.75, 1.0)`.
-
-**Time-series `ts_rank(x, window)`:**
-- Output range: (0, 1] (same convention).
-- Formula: `average_rank / window` within each symbol's rolling window.
-- Tie-breaking: `average` method.
-
-**Group `group_rank(x, group)`:**
-- Output range: (0, 1] within each group.
-- Same average method and null exclusion.
-
-### Rolling Window Warmup
-
-All `ts_*` operators enforce `min_samples=window`. The first `window-1`
-observations per symbol return null. This prevents partial-window statistics
-from contaminating signals.
-
-### ts_product: Negative and Zero Handling
-
-`ts_product(x, window)` uses **sign-magnitude decomposition**:
-
-1. Count negative values in the window to determine product sign.
-2. Compute magnitude via `exp(sum(log(abs(x))))` (numerically stable).
-3. If any value in the window is zero, the product is 0.
-4. If any value is null, it propagates (min_samples applies).
-
-A naive `exp(sum(log(x)))` implementation silently fails for negative inputs
-because `log(x)` is undefined for `x < 0`. The sign-magnitude approach avoids
-this by operating on absolute values and tracking the sign separately.
+| Convention | Rule |
+| --- | --- |
+| Missing values | NaN and Inf are converted to null on Factor creation. Single missing-value type (null) throughout. |
+| Null arithmetic | `5.0 + null = null`. Use `filter=True` on add/subtract/multiply to treat null as identity (0 for +/-, 1 for *). |
+| Division by zero | All divisions guarded at `abs(denom) < 1e-10`, returning null. |
+| Std / Variance | ddof=0 (population) for all std, variance, covariance, zscore, normalize, winsorize. |
+| Correlation | ddof-invariant. `ts_covariance(x,y,w) / (ts_std_dev(x,w) * ts_std_dev(y,w)) == ts_corr(x,y,w)`. |
+| Rank | Range (0, 1]. Does not include zero. Ties: `average` method. Null excluded. |
+| Rolling warmup | All `ts_*` operators: first `window-1` values per symbol are null (`min_samples=window`). |
 
 ---
 
-## Arithmetic Operators
+## Arithmetic
 
 ### `add(a, b, filter=False)`
 
-Element-wise addition of two Factors.
-
-- **filter=False** (default): `null + value = null`
-- **filter=True**: null treated as 0 before addition
+Element-wise addition. `filter=True`: null treated as 0.
 
 ```python
 add(close, volume)               # null propagates
 add(close, volume, filter=True)  # null -> 0
-close + volume                   # operator overload, same as add(close, volume)
+close + volume                   # operator overload
 ```
 
 ### `subtract(a, b, filter=False)`
 
-Element-wise subtraction. Same filter semantics as `add`.
-
-```python
-subtract(close, open_)
-close - open_
-```
+Element-wise subtraction. `filter=True`: null treated as 0.
 
 ### `multiply(a, b, filter=False)`
 
-Element-wise multiplication.
-
-- **filter=True**: null treated as 1 (multiplicative identity)
-
-```python
-multiply(close, volume, filter=True)
-close * volume
-```
+Element-wise multiplication. `filter=True`: null treated as 1.
 
 ### `divide(a, b)`
 
-Element-wise division with zero guard.
-
-- Returns null where `abs(b) < 1e-10`
-- No `filter` parameter (division by null is always null)
-
-```python
-divide(close, volume)
-close / volume
-```
+Element-wise division. Returns null where `abs(b) < 1e-10`.
 
 ### `reverse(x)`
 
@@ -168,490 +48,333 @@ Negation: `-x`.
 
 ### `densify(x)`
 
-Remaps integer group labels to consecutive integers `0..(n-1)` where `n` is
-the number of unique non-null values at each timestamp. Uses dense rank (ties
-receive the same value).
-
-Useful for compacting sparse group fields before passing to `group_*` operators.
+Remaps group labels to consecutive integers `0..(n-1)` per timestamp. Dense rank (ties get same value).
 
 ### `bucket(x, n)`
 
-Assigns each value to one of `n` equal-frequency buckets based on cross-sectional
-rank. Returns integer bucket labels. Null inputs produce null output.
+Assigns values to `n` equal-frequency buckets by cross-sectional rank. Returns integer labels. Null in, null out.
 
 ---
 
-## Time-Series Operators
+## Time-Series
 
-All time-series operators compute per-symbol rolling statistics. They share:
-- `min_samples=window` (first `window-1` values are null)
-- Null propagation within windows
+Per-symbol rolling window operators. All share: `min_samples=window`, first `window-1` values are null.
 
 ### `ts_delay(x, window)`
 
-Returns the value of `x` from `window` periods ago.
-
-- Warmup: first `window` values are null
-- Math: `x[t - window]`
+Value from `window` periods ago. `x[t - window]`.
 
 ### `ts_delta(x, window)`
 
-Difference between current value and value `window` periods ago.
-
-- Math: `x[t] - x[t - window]`
-- Warmup: first `window` values are null
+`x[t] - x[t - window]`.
 
 ### `ts_mean(x, window)`
 
-Simple moving average over `window` periods.
-
-- Math: `(1/n) * sum(x[t-w+1] ... x[t])` where `n` = count of non-null
-- Warmup: `window - 1` null values
+Simple moving average. `(1/n) * sum(x)` where `n` = non-null count in window.
 
 ### `ts_sum(x, window)`
 
-Rolling sum over `window` periods.
-
-- Math: `sum(x[t-w+1] ... x[t])`
-- Warmup: `window - 1` null values
+Rolling sum.
 
 ### `ts_std_dev(x, window)`
 
-Rolling standard deviation.
-
-- Math: population std (ddof=0)
-- Warmup: `window - 1` null values
+Rolling standard deviation. ddof=0.
 
 ### `ts_min(x, window)`
 
-Rolling minimum over `window` periods.
+Rolling minimum.
 
 ### `ts_max(x, window)`
 
-Rolling maximum over `window` periods.
+Rolling maximum.
 
 ### `ts_median(x, window)`
 
-Rolling median over `window` periods.
+Rolling median.
 
 ### `ts_rank(x, window)`
 
-Rank of current value within its own rolling window.
+Rank of current value within its rolling window.
 
-- Output range: (0, 1]
-- Tie-breaking: `average` method
-- Math: `average_rank / window`
-- Warmup: `window - 1` null values
+- Range: (0, 1]
+- Ties: `average`
+- Formula: `average_rank / window`
 
 ### `ts_skewness(x, window)`
 
-Rolling skewness (bias=True, population).
-
-- Warmup: `window - 1` null values
+Rolling skewness. Population (bias=True).
 
 ### `ts_kurtosis(x, window)`
 
-Rolling excess kurtosis (Fisher definition: normal distribution = 0).
-
-- Warmup: `window - 1` null values
+Rolling excess kurtosis. Fisher definition (normal = 0).
 
 ### `ts_zscore(x, window)`
 
-Rolling z-score: `(x - rolling_mean) / rolling_std`.
-
-- ddof=0 for std
-- Zero guard: if `rolling_std < 1e-10`, returns 0 (constant series)
-- Warmup: `window - 1` null values
+`(x - rolling_mean) / rolling_std`. ddof=0. Returns 0 if `std < 1e-10`.
 
 ### `ts_corr(x, y, window)`
 
-Rolling Pearson correlation between two Factors.
-
-- ddof=1 (sample covariance)
-- Range: [-1, 1]
-- Warmup: `window - 1` null values
+Rolling Pearson correlation. Range: [-1, 1].
 
 ### `ts_covariance(x, y, window)`
 
-Rolling population covariance between two Factors.
-
-- ddof=0
-- Warmup: `window - 1` null values
+Rolling population covariance. ddof=0.
 
 ### `ts_product(x, window)`
 
-Rolling product over `window` periods.
+Rolling product. Handles negatives via sign-magnitude decomposition. Zero in window returns 0.
 
-- Handles negative values via sign-magnitude decomposition
-- Zero in window: returns 0
-- Math: `sign * exp(sum(log(abs(x))))` where sign = `(-1)^count_negatives`
-- Warmup: `window - 1` null values
+- Math: `(-1)^count_neg * exp(sum(log(abs(x))))`
 
 ### `ts_step(n)`
 
-Returns an incrementing counter starting at 1. Typically used as the independent
-variable in `ts_regression`.
-
-- No warmup (available from first observation)
+Incrementing counter starting at 1. No warmup.
 
 ### `ts_decay_linear(x, window)`
 
-Linearly weighted moving average. Recent values receive higher weights.
+Linearly weighted moving average. Weights: `[1, 2, ..., window]` (oldest=1, newest=window).
 
-- Weights: `[1, 2, ..., window]` (oldest=1, newest=window)
 - Math: `sum(x[i] * w[i]) / sum(w[i])`
-- Warmup: `window - 1` null values
 
 ### `ts_decay_exp_window(x, window, factor)`
 
-Exponentially weighted moving average over a fixed window.
+Exponentially weighted moving average. `factor` in (0, 1): smaller = faster decay.
 
-- `factor` in (0, 1): smaller = faster decay (less weight on old values)
 - Math: `sum(x[t-i] * factor^i) / sum(factor^i)` for `i = 0..window-1`
-- Warmup: `window - 1` null values
 
 ### `days_from_last_change(x)`
 
-Counts the number of periods since the value of `x` last changed.
-
-- Returns 0 on the period where a change occurs
-- No warmup
+Periods since last value change. Returns 0 on change. No warmup.
 
 ### `ts_av_diff(x, window)`
 
-Difference between current value and rolling mean.
-
-- Math: `x - ts_mean(x, window)`
-- Warmup: `window - 1` null values
+`x - ts_mean(x, window)`.
 
 ### `ts_scale(x, window)`
 
-Min-max normalization within a rolling window.
-
-- Math: `(x - ts_min(x, w)) / (ts_max(x, w) - ts_min(x, w))`
-- Zero guard: if range < 1e-10, returns 0
-- Output range: [0, 1] when range is non-zero
-- Warmup: `window - 1` null values
+Min-max normalization: `(x - min) / (max - min)`. Range: [0, 1]. Returns 0 if range < 1e-10.
 
 ### `ts_percentile(x, window, q)`
 
-Rolling percentile of `x` at quantile `q`.
-
-- `q` in [0, 1]
-- Warmup: `window - 1` null values
+Rolling percentile at quantile `q` in [0, 1].
 
 ### `ts_quantile(x, window, driver="gaussian")`
 
-Computes `ts_rank` then applies the inverse CDF of the chosen distribution.
-
-- Supported drivers: `gaussian` (default), `uniform`, `cauchy`
-- Rank values clamped to [0.001, 0.999] before inverse CDF to avoid infinity
-- Warmup: `window - 1` null values
+`ts_rank` then inverse CDF. Drivers: `gaussian`, `uniform`, `cauchy`. Rank clamped to [0.001, 0.999].
 
 ### `ts_cv(x, window)`
 
-Rolling coefficient of variation: `std / abs(mean)`.
+Coefficient of variation: `std / abs(mean)`. ddof=0. Returns null if `abs(mean) < 1e-10`.
 
-- ddof=0 for std
-- Zero guard: if `abs(mean) < 1e-10`, returns null
-- Warmup: `window - 1` null values
+### `ts_autocorr(x, window, lag=1)`
 
-### `ts_autocorr(x, window, lag)`
-
-Rolling autocorrelation of `x` with itself lagged by `lag` periods.
-
-- ddof=1 (sample)
-- Warmup: `window - 1` null values
+Rolling autocorrelation with specified lag.
 
 ### `ts_count_nans(x, window)`
 
-Counts the number of null values in the rolling window.
-
-- Warmup: `window - 1` null values (returns null, not a count)
+Count of null values in rolling window.
 
 ### `ts_backfill(x, window, k=1)`
 
-Replaces null values with the k-th most recent non-null value within the
-lookback window.
-
-- `k=1` (default): most recent non-null
-- `k=2`: second most recent non-null
-- No warmup (operates on available data)
+Fill nulls with k-th most recent non-null value. `k=1`: most recent. No warmup.
 
 ### `kth_element(x, window, k, ignore="NaN")`
 
-Returns the k-th non-ignored value looking back over `window` periods.
-
-- `ignore="NaN"`: skip null values
-- `ignore="NaN 0"`: skip null and zero (uses `abs > 1e-10` for zero check)
+k-th non-ignored value looking back. `ignore="NaN"`: skip null. `ignore="NaN 0"`: skip null and zero.
 
 ### `last_diff_value(x, window)`
 
-Returns the most recent value within the lookback window that differs from
-the current value. Returns null if no different value exists.
+Most recent value in lookback that differs from current. Null if none.
 
 ### `inst_tvr(x, window)`
 
-Instrument turnover ratio: total absolute changes divided by total absolute values
-over the window.
-
-- Math: `sum(abs(delta)) / sum(abs(x))`
-- Zero guard: if denominator < 1e-10, returns 0
-- Warmup: `window - 1` null values
+Instrument turnover: `sum(abs(delta)) / sum(abs(x))`. Returns 0 if denom < 1e-10.
 
 ### `ts_delta_limit(x, y, limit_volume=0.1)`
 
-Limits the per-period change in `x` to a fraction of `y`.
+Clips per-period change in `x` to a fraction of `y`.
 
 ### `ts_regression(y, x, window, lag=0, rettype=0)`
 
-Rolling OLS regression of `y` on `x` over `window` periods.
-
-Returns different statistics based on `rettype`:
+Rolling OLS regression.
 
 | rettype | Returns |
 | --- | --- |
-| 0 | Residual (error term) |
-| 1 | Intercept (alpha) |
+| 0 | Residual |
+| 1 | Intercept |
 | 2 | Slope (beta) |
-| 3 | y-estimate (fitted value) |
-| 4 | SSE (sum of squared errors) |
-| 5 | SST (sum of squared totals) |
+| 3 | Fitted value |
+| 4 | SSE |
+| 5 | SST |
 | 6 | R-squared |
-| 7 | MSE (mean squared error) |
-| 8 | Standard error of beta |
-| 9 | Standard error of alpha |
+| 7 | MSE |
+| 8 | Std error of beta |
+| 9 | Std error of alpha |
 
-- Zero guard: if `sum(x^2) < 1e-10` or `SST < 1e-10`, returns null where appropriate
-- `lag` parameter shifts `x` by the specified number of periods
+Zero guard on `sum(x^2) < 1e-10` and `SST < 1e-10`.
 
 ### `trade_when(cond, x, exit_value=-1.79e308)`
 
-Holds the value of `x` when `cond` is true; otherwise forward-fills the
-last triggered value. Uses a sentinel value for exit signals.
-
-> **Known limitation:** The sentinel-based approach (`-1.79e308`) should be
-> replaced with a struct-based design in a future version.
+Hold `x` when `cond` is true, forward-fill otherwise. Sentinel-based exit.
 
 ---
 
-## Cross-Sectional Operators
+## Cross-Sectional
 
-All cross-sectional operators compute across all symbols at each timestamp.
+Across all symbols at each timestamp.
 
 ### `rank(x)`
 
-Cross-sectional rank normalized to (0, 1].
+Normalized rank in (0, 1]. Ties: `average`. Null excluded.
 
-- Tie-breaking: `average` method
-- Null: excluded from ranking, remain null
-- Formula: `average_rank / count_of_non_null`
-
-For input `(4, 3, 6, 10, 2)` across 5 instruments, elvers returns
-`(0.6, 0.4, 0.8, 1.0, 0.2)` -- strictly positive, no zero.
+Example: `(4, 3, 6, 10, 2)` -> `(0.6, 0.4, 0.8, 1.0, 0.2)`
 
 ### `zscore(x)`
 
-Cross-sectional z-score: `(x - mean) / std`.
-
-- ddof=0 (population)
-- Zero guard: if `std < 1e-10`, returns 0
+`(x - mean) / std`. ddof=0. Returns 0 if `std < 1e-10`.
 
 ### `mean(x)`
 
-Cross-sectional mean. Broadcasts the mean value to all symbols at each timestamp.
+Cross-sectional mean, broadcast to all symbols.
 
 ### `median(x)`
 
-Cross-sectional median. Broadcasts similarly.
+Cross-sectional median, broadcast to all symbols.
 
 ### `scale(x, target=1, longscale=1, shortscale=1)`
 
-Scales positions so that `sum(abs(x)) = target`.
-
-- Separate long/short scaling supported via `longscale` and `shortscale`
-- Zero guard: if `sum(abs(x)) < 1e-10`, returns 0
+Scale so `sum(abs(x)) = target`. Separate long/short via `longscale`/`shortscale`. Returns 0 if sum < 1e-10.
 
 ### `normalize(x, use_std=False, limit=0.0)`
 
-Demeans the cross-section: `x - mean(x)`.
-
-- `use_std=True`: additionally divides by std (ddof=0)
-- `limit > 0`: clips result to `[-limit, limit]`
-- Zero guard: if std < 1e-10 (when use_std=True), returns 0
+Demean: `x - mean(x)`. `use_std=True`: divide by std (ddof=0). `limit > 0`: clip to [-limit, limit].
 
 ### `quantile(x, driver="gaussian", sigma=1.0)`
 
-Ranks the cross-section then applies inverse CDF of the chosen distribution.
-
-- Supported drivers: `gaussian`, `uniform`, `cauchy`
-- Uses Acklam's rational approximation for Gaussian inverse CDF
-  (max error ~1.15e-9, pure Polars implementation)
-- Rank values shifted to avoid 0 and 1 before inverse CDF
+Rank then inverse CDF. Drivers: `gaussian`, `uniform`, `cauchy`. Acklam approximation (error < 1.15e-9).
 
 ### `signal(x)`
 
-Normalizes `x` to have zero mean and unit absolute sum, with minimum
-participation threshold.
-
-- Zero guard: if `abs_sum < 1e-10` or `valid_count < 2`, returns 0
-- Ensures net-zero portfolio weights
+Zero-mean, unit-absolute-sum normalization. Returns 0 if `abs_sum < 1e-10` or `count < 2`.
 
 ### `winsorize(x, std=4)`
 
-Clips values to `[mean - std*sigma, mean + std*sigma]` where sigma is the
-cross-sectional standard deviation (ddof=0).
+Clip to `[mean - std*sigma, mean + std*sigma]`. ddof=0.
 
 ### `truncate(x, max_percent=0.01)`
 
-Caps each value so no single position exceeds `max_percent` of the total
-absolute sum.
+Cap each value so no position exceeds `max_percent` of total absolute sum.
 
 ### `left_tail(x, maximum)`
 
-Nullifies values above `maximum`. Keeps only the left tail.
+Null values above `maximum`.
 
 ### `right_tail(x, minimum)`
 
-Nullifies values below `minimum`. Keeps only the right tail.
+Null values below `minimum`.
 
 ---
 
-## Neutralization and Group Operators
+## Neutralization and Group
 
 ### `vector_neut(x, y)`
 
-Projects `x` onto `y` and returns the orthogonal residual: `x - proj_y(x)`.
-
-- Math: `x - (dot(x,y) / dot(y,y)) * y`
-- Zero guard: if `dot(y,y) < 1e-10`, returns `x` unchanged
+Orthogonal residual: `x - proj_y(x)`. Returns `x` if `dot(y,y) < 1e-10`.
 
 ### `regression_neut(y, x)`
 
-OLS cross-sectional neutralization. Returns the residual of regressing `y` on `x`.
-
-- Math: `y - (alpha + beta * x)` where beta = `cov(y,x) / var(x)`
-- Zero guard: if `var(x) < 1e-10`, beta = 0
+OLS residual: `y - (alpha + beta * x)`. beta = 0 if `var(x) < 1e-10`.
 
 ### `group_neutralize(x, group)`
 
-Subtracts the group mean from each value: `x - group_mean(x)`.
+`x - group_mean(x)`.
 
 ### `group_rank(x, group)`
 
-Rank within each group, normalized to (0, 1].
-
-- Same `average` tie-breaking and null exclusion as `rank()`
+Rank within group. (0, 1], `average`, null excluded.
 
 ### `group_zscore(x, group)`
 
-Z-score within each group.
-
-- ddof=0
-- Zero guard: if group std < 1e-10, returns 0
+Z-score within group. ddof=0. Returns 0 if group std < 1e-10.
 
 ### `group_scale(x, group)`
 
-Min-max scaling within each group to [0, 1].
-
-- Zero guard: if group range < 1e-10, returns 0
+Min-max within group to [0, 1]. Returns 0 if range < 1e-10.
 
 ### `group_normalize(x, group, target=1)`
 
-Scales within each group so that `sum(abs(x)) = target`.
-
-- Zero guard: if group abs sum < 1e-10, returns 0
+Scale within group so `sum(abs(x)) = target`. Returns 0 if sum < 1e-10.
 
 ### `group_mean(x, group, weight=None)`
 
-Group mean, optionally weighted.
-
-- If `weight` provided and `sum(weight) < 1e-10`, falls back to unweighted mean
+Group mean. With weight: weighted mean, falls back to unweighted if `sum(weight) < 1e-10`.
 
 ### `group_median(x, group)`
 
-Group median, broadcast back to each member.
+Group median, broadcast to members.
 
 ### `group_backfill(x, group, std=4)`
 
-Fills null values with the winsorized group mean (clipped at `std` standard
-deviations, ddof=0).
+Fill nulls with winsorized group mean (ddof=0, clipped at `std` sigmas).
 
 ---
 
-## Mathematical Operators
+## Math
 
 ### `log(x, base=None)`
 
-Natural logarithm (default) or logarithm with specified base.
-
-- Returns null for non-positive inputs (Polars native behavior)
+Natural log (default) or specified base. Null for non-positive inputs.
 
 ### `sqrt(x)`
 
-Square root. Returns null for negative inputs.
-
-> For sign-preserving roots, use `signed_power(x, 0.5)`.
+Square root. Null for negative inputs. For sign-preserving: `signed_power(x, 0.5)`.
 
 ### `sign(x)`
 
-Returns -1, 0, or +1 based on the sign of `x`. Null returns null.
+-1, 0, or +1. Null returns null.
 
 ### `power(x, exp)`
 
-Element-wise exponentiation: `x ^ exp`. Accepts scalar or Factor exponent.
+`x ^ exp`. Scalar or Factor exponent.
 
 ### `signed_power(x, exp)`
 
-Sign-preserving power: `sign(x) * abs(x) ^ exp`.
-
-Useful because `power(-3, 2) = 9` (loses sign) while
-`signed_power(-3, 2) = -9` (preserves sign).
+`sign(x) * abs(x) ^ exp`. Preserves sign.
 
 ### `inverse(x)`
 
-Reciprocal: `1 / x`.
-
-- Zero guard: returns null where `abs(x) < 1e-10`
+`1 / x`. Returns null where `abs(x) < 1e-10`.
 
 ### `s_log_1p(x)`
 
-Sign-preserving log: `sign(x) * log(1 + abs(x))`.
-
-Compresses large values while preserving sign and order. For small `x`,
-approximately equals `x`.
+`sign(x) * log(1 + abs(x))`. Compresses large values, preserves sign and order.
 
 ### `maximum(x, y)`
 
-Element-wise maximum of two Factors (or Factor and scalar).
+Element-wise max. Scalar or Factor.
 
 ### `minimum(x, y)`
 
-Element-wise minimum of two Factors (or Factor and scalar).
+Element-wise min. Scalar or Factor.
 
 ### `where(cond, x, y)`
 
-Conditional selection: returns `x` where `cond` is truthy, `y` otherwise.
-Null in `cond` is treated as falsy.
+`x` where `cond` is truthy, `y` otherwise. Null in `cond` is falsy.
 
 ---
 
-## Experimental Operators (not exported)
+## Experimental (not exported)
 
-These live in `ops/_dev.py` and are **not production-grade**. They use Python
-callbacks via `rolling_map` and are not included in `__all__`.
+In `ops/_dev.py`. Use Python callbacks (`rolling_map`). Not in `__all__`.
 
 ### `ts_arg_max(x, window)` [DEV]
 
-Returns how many periods ago the maximum occurred in the window.
+Periods since max in window.
 
 ### `ts_arg_min(x, window)` [DEV]
 
-Returns how many periods ago the minimum occurred in the window.
+Periods since min in window.
 
 ### `hump(x, hump=0.01)` [DEV]
 
-Limits turnover by capping per-period changes.
-
-> **Known bug:** `abs_sum` is computed over the entire series instead of a
-> rolling window.
+Limit turnover by capping per-period changes.
diff --git a/README.md b/README.md
index 097b86e..7571eb3 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Polars-native factor computation engine for quantitative research. All operators
 | Null semantics | NaN/Inf unified to null on construction. Single missing-value type throughout. |
 | Division by zero | All divisions guarded at `abs(denom) < 1e-10`, returning null. |
 | Rank range | (0, 1] -- does not pass through zero. Ties use `average` method. |
-| Std/Variance | ddof=0 (population). Corr/Cov use ddof=1 (sample). |
+| Std/Variance/Covariance | ddof=0 (population). Correlation is ddof-invariant. |
 | Rolling warmup | All `ts_*` operators: first `window-1` values are null. |
 
 Full conventions and per-operator specifications:

From badd468e179b9d82b99cf861acf61eb0571c4498 Mon Sep 17 00:00:00 2001
From: quantbai <quantbai@gmail.com>
Date: Tue, 24 Mar 2026 17:22:10 +0800
Subject: [PATCH 10/10] release: v0.3.0

[NUMERICAL] ts_covariance unified to ddof=0, fixing cov/(std*std)==corr identity.
Documentation restructured: OPERATORS.md as reference, rationale in CLAUDE.md.
Fixed incorrect signatures (trade_when, scale, bucket) and README example.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md       | 15 ++++++++++++++-
 OPERATORS.md       | 14 +++++++-------
 README.md          |  8 ++++----
 elvers/__init__.py |  7 ++-----
 4 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 228b85b..6c97dbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,19 @@ Numerical changes are marked with [NUMERICAL].
 
 ## [Unreleased]
 
+## [0.3.0] - 2026-03-24
+
+### Fixed
+- [NUMERICAL] `ts_covariance`: unified to ddof=0 (population), consistent with all other
+  variance/std operators. Fixes the broken identity `cov/(std_x*std_y) == corr` which
+  previously had ~5-20% error due to mixed ddof. Cross-validated against numpy (diff < 1e-15).
+
+### Changed
+- OPERATORS.md rewritten as pure operator reference manual (signatures, behavior, edge cases)
+- Design rationale moved to CLAUDE.md Section 4.1 (developer-facing)
+- Fixed incorrect signatures in docs: `trade_when`, `scale`, `bucket`
+- Fixed README example code to use only columns present in sample data
+
 ## [0.2.0] - 2026-03-23
 
 ### Added
@@ -24,7 +37,7 @@ Numerical changes are marked with [NUMERICAL].
 
 ### Fixed
 - [NUMERICAL] `ts_product`: silently returned null for negative inputs; now correctly handles negative values via sign-magnitude decomposition
-- [NUMERICAL] `ts_covariance`: used ddof=0 (population) inconsistent with `ts_corr` (ddof=1); aligned to ddof=1 (sample)
+- [NUMERICAL] `ts_covariance`: added explicit ddof parameter (was using Polars default)
 - [NUMERICAL] `divide()`: no zero-denominator protection; now returns null where abs(divisor) < 1e-10
 - [NUMERICAL] `inverse()`: no zero protection; now returns null where abs(x) < 1e-10
 - `ts_regression` rettype=7 (MSE): implicit Inf-to-null on window=2; now has explicit guard
diff --git a/OPERATORS.md b/OPERATORS.md
index 3e5c7ff..42d7a85 100644
--- a/OPERATORS.md
+++ b/OPERATORS.md
@@ -1,6 +1,6 @@
 # Elvers Operator Reference
 
-79 operators. All accept and return `Factor`.
+72 operators. All accept and return `Factor`.
 
 ---
 
@@ -50,9 +50,9 @@ Negation: `-x`.
 
 Remaps group labels to consecutive integers `0..(n-1)` per timestamp. Dense rank (ties get same value).
 
-### `bucket(x, n)`
+### `bucket(x, buckets=None, range_params=None)`
 
-Assigns values to `n` equal-frequency buckets by cross-sectional rank. Returns integer labels. Null in, null out.
+Assigns values to bucket indices based on edge boundaries. Provide either `buckets` (list of edges) or `range_params` (start, end, step). Returns integer labels. Null in, null out.
 
 ---
 
@@ -213,9 +213,9 @@ Rolling OLS regression.
 
 Zero guard on `sum(x^2) < 1e-10` and `SST < 1e-10`.
 
-### `trade_when(cond, x, exit_value=-1.79e308)`
+### `trade_when(trigger, alpha, exit_cond)`
 
-Hold `x` when `cond` is true, forward-fill otherwise. Sentinel-based exit.
+Hold `alpha` when `trigger > 0`, null when `exit_cond > 0`, forward-fill otherwise. All three parameters are Factors.
 
 ---
 
@@ -241,9 +241,9 @@ Cross-sectional mean, broadcast to all symbols.
 
 Cross-sectional median, broadcast to all symbols.
 
-### `scale(x, target=1, longscale=1, shortscale=1)`
+### `scale(x, target=1.0, longscale=0.0, shortscale=0.0)`
 
-Scale so `sum(abs(x)) = target`. Separate long/short via `longscale`/`shortscale`. Returns 0 if sum < 1e-10.
+Scale so `sum(abs(x)) = target`. When `longscale`/`shortscale` are non-zero, scale long and short legs separately. Returns 0 if sum < 1e-10.
 
 ### `normalize(x, use_std=False, limit=0.0)`
 
diff --git a/README.md b/README.md
index 7571eb3..da5965f 100644
--- a/README.md
+++ b/README.md
@@ -39,15 +39,15 @@ pip install elvers
 ## Usage
 
 ```python
-from elvers import load, ts_rank, ts_regression, zscore, signal, group_neutralize
+from elvers import load, ts_rank, ts_regression, zscore, signal
 
-panel = load("ohlcv.parquet")           # or load() for built-in sample data
+panel = load()                          # built-in sample data (crypto 1d OHLCV)
 close, volume = panel["close"], panel["volume"]
 
 momentum    = ts_rank(close, 20)
 vol_adj     = zscore(momentum) / zscore(ts_rank(volume, 20))
 beta_resid  = ts_regression(close, volume, window=60, rettype=0)
-alpha       = signal(group_neutralize(vol_adj, panel["sector"]))
+alpha       = signal(vol_adj)
 ```
 
 Sub-daily data is supported via the `interval` parameter:
@@ -58,7 +58,7 @@ panel = load("hourly.parquet", interval="1h")
 
 ## Operators
 
-70+ operators. All accept and return `Factor`.
+72 operators. All accept and return `Factor`.
 
 **Time-Series** -- rolling window per symbol:
 
diff --git a/elvers/__init__.py b/elvers/__init__.py
index 3c41065..b2eac87 100644
--- a/elvers/__init__.py
+++ b/elvers/__init__.py
@@ -1,13 +1,10 @@
 """
-Elvers - Polars-native factor expression engine.
-
-High-performance, strictly-typed multi-factor alpha research
-built on Polars for lightning-fast factor computation.
+Elvers - Polars-native factor computation engine.
 
 Author: quantbai
 """
 
-__version__ = "0.2.0"
+__version__ = "0.3.0"
 __author__ = "quantbai"
 
 from .core import Factor