Skip to content

Commit 6457a49

Browse files
authored
Add test suite for backwards compatibility (#999)
* Add backwards compatibility tests. * Save data to disk, save predictions as npy.
1 parent a25bdcf commit 6457a49

5 files changed

Lines changed: 303 additions & 0 deletions

File tree

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Backwards Compatibility
2+
3+
on:
4+
# We would like to trigger for CI for any pull request action -
5+
# both from QuantCo's branches as well as forks.
6+
pull_request:
7+
# In addition to pull requests, we want to run CI for pushes
8+
# to the main branch and tags.
9+
push:
10+
branches:
11+
- "main"
12+
tags:
13+
- "*"
14+
workflow_dispatch:
15+
16+
jobs:
17+
backwards-compatibility:
18+
name: Backwards compatibility tests
19+
runs-on: ubuntu-latest
20+
timeout-minutes: 60
21+
steps:
22+
- name: Checkout branch
23+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
24+
25+
- name: Set up pixi
26+
uses: prefix-dev/setup-pixi@a0af7a228712d6121d37aba47adf55c1332c9c2e # v0.9.4
27+
with:
28+
environments: default
29+
cache: true
30+
31+
- name: Install current glum
32+
run: pixi run postinstall
33+
34+
- name: Run backwards compatibility tests
35+
run: pixi run test-backwards-compatibility

pixi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ postinstall = "pip install --no-build-isolation --no-deps --disable-pip-version-
1010
store-benchmark-golden-master = { cmd = "python tests/glm/test_benchmark_golden_master.py", env = { PYTHONPATH = "." } }
1111
store-golden-master = { cmd = "python tests/glm/test_golden_master.py", env = { PYTHONPATH = "." } }
1212
test = { cmd = "pytest tests/glm --doctest-modules src/glum", env = { PYTHONPATH = "." } }
13+
test-backwards-compatibility = { cmd = "python tests/backwards_compatibility/run.py" }
1314

1415
[feature.docs.tasks]
1516
make-docs = "cd docs && make html"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
artifacts/
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Fit a GLM with the installed glum version and save artifacts.
2+
3+
Usage:
4+
python fit.py X.Y.Z # label artifacts with a release version
5+
python fit.py HEAD # label artifacts with HEAD (current repo version)
6+
7+
Artifacts are written to:
8+
tests/backwards_compatibility/artifacts/<version>/model.pkl
9+
tests/backwards_compatibility/artifacts/<version>/predictions.npy
10+
11+
NOTE: This script must work with glum >= 2.0.0. It deliberately avoids
12+
features added in 3.x: formula interface, Polars DataFrames, monotonic
13+
constraints, closed-form solver.
14+
"""
15+
16+
import argparse
17+
import pickle
18+
from pathlib import Path
19+
20+
import numpy as np
21+
22+
from glum import GeneralizedLinearRegressor
23+
24+
SCRIPT_DIR = Path(__file__).resolve().parent
25+
ARTIFACTS_DIR = SCRIPT_DIR / "artifacts"
26+
27+
28+
def main():
29+
parser = argparse.ArgumentParser()
30+
parser.add_argument(
31+
"version",
32+
help="Artifact label: a release version string (e.g. 2.0.3) or HEAD",
33+
)
34+
args = parser.parse_args()
35+
36+
import glum
37+
38+
installed_version = glum.__version__
39+
40+
print(f"Installed glum version: {installed_version}")
41+
print(f"Artifact label: {args.version}")
42+
43+
output_dir = ARTIFACTS_DIR / args.version
44+
output_dir.mkdir(parents=True, exist_ok=True)
45+
46+
X = np.load(str(ARTIFACTS_DIR / "X.npy"))
47+
y = np.load(str(ARTIFACTS_DIR / "y.npy"))
48+
49+
# All keyword args: glum 3.0 made all params keyword-only; kwargs work in 2.x too.
50+
# alpha=1.0: explicit to avoid 2.x (default=1) vs 3.x (default=0) difference.
51+
# solver="irls-cd": avoids the closed-form solver added in 3.2 which may produce
52+
# slightly different floating-point results against the iterative solver.
53+
model = GeneralizedLinearRegressor(
54+
family="normal",
55+
alpha=1.0,
56+
solver="irls-cd",
57+
)
58+
model.fit(X, y)
59+
60+
pickle_path = output_dir / "model.pkl"
61+
with open(pickle_path, "wb") as f:
62+
pickle.dump(model, f)
63+
print(f"Saved model to {pickle_path}")
64+
65+
predictions = model.predict(X)
66+
predictions_path = output_dir / "predictions.npy"
67+
np.save(str(predictions_path), predictions)
68+
print(f"Saved predictions to {predictions_path}")
69+
70+
71+
if __name__ == "__main__":
72+
main()
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""Backwards compatibility tests for glum.
2+
3+
Usage: python tests/backwards_compatibility/run_all.py
4+
(or via: pixi run test-backwards-compatibility)
5+
6+
1. Fits the current (HEAD) glum to produce reference predictions.
7+
2. Queries conda-forge via `pixi search` to discover the latest patch release
8+
for each minor version of glum.
9+
3. For each version, uses `pixi exec` to fit a model and save artifacts
10+
(model.pkl + predictions.npy) under artifacts/<version>/.
11+
4. Unpickles each saved model using the current glum and verifies that
12+
predictions match the HEAD reference.
13+
"""
14+
15+
import json
16+
import pickle
17+
import subprocess
18+
import sys
19+
from pathlib import Path
20+
21+
import numpy as np
22+
from packaging.version import Version
23+
from sklearn.datasets import make_regression
24+
25+
SCRIPT_DIR = Path(__file__).resolve().parent
26+
ARTIFACTS_DIR = SCRIPT_DIR / "artifacts"
27+
28+
SKIP_VERSIONS: set[str] = set()
29+
30+
31+
def write_dataset() -> None:
32+
"""Write the fixed dataset to disk so all fit.py invocations use identical data."""
33+
X, y = make_regression(n_samples=500, n_features=5, noise=1.0, random_state=42)
34+
ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
35+
np.save(str(ARTIFACTS_DIR / "X.npy"), X)
36+
np.save(str(ARTIFACTS_DIR / "y.npy"), y)
37+
38+
39+
def discover_versions() -> list[str]:
40+
"""Return the latest patch release for each minor version of glum on conda-forge."""
41+
result = subprocess.run(
42+
["pixi", "search", "glum", "--json"],
43+
check=True,
44+
capture_output=True,
45+
text=True,
46+
)
47+
data = json.loads(result.stdout)
48+
platform = next(iter(data))
49+
best: dict[tuple[int, int], str] = {}
50+
for entry in data[platform]:
51+
v = Version(entry["version"])
52+
key = (v.major, v.minor)
53+
if key not in best or v > Version(best[key]):
54+
best[key] = entry["version"]
55+
return sorted(best.values(), key=Version)
56+
57+
58+
def fit_version(version: str) -> bool:
59+
"""Run fit.py for the given version and return True on success.
60+
61+
Uses ``pixi run`` for HEAD and ``pixi exec`` for released versions.
62+
"""
63+
if version == "HEAD":
64+
cmd = ["pixi", "run", "python", str(SCRIPT_DIR / "fit.py"), "HEAD"]
65+
else:
66+
v = Version(version)
67+
cmd = ["pixi", "exec", f"--spec=glum=={version}"]
68+
# glum <=2.6.0 imports pkg_resources from setuptools, which was removed
69+
# in setuptools 82. Pin setuptools<82 for those old versions.
70+
if v <= Version("2.6.0"):
71+
cmd += ["--spec=setuptools<82"]
72+
# glum <=2.3.0: sklearn 1.3 added const qualifiers to _cython_blas
73+
# function pointers, breaking the Cython ABI of older glum builds.
74+
if v <= Version("2.3.0"):
75+
cmd += ["--spec=scikit-learn<1.3"]
76+
# glum 3.0.x: sklearn 1.6 removed BaseEstimator._validate_data.
77+
elif v < Version("3.1.0"):
78+
cmd += ["--spec=scikit-learn<1.6"]
79+
cmd += ["python", str(SCRIPT_DIR / "fit.py"), version]
80+
result = subprocess.run(cmd, capture_output=True, text=True)
81+
if result.returncode != 0:
82+
print(result.stdout, end="")
83+
print(result.stderr, end="", file=sys.stderr)
84+
return result.returncode == 0
85+
86+
87+
def compare_versions(versions: list[str]) -> bool:
88+
"""Unpickle each version's model and verify its predictions match HEAD.
89+
90+
Also checks that predictions match the array stored by fit.py to confirm
91+
the pickle round-trip is stable. Returns True if all versions pass.
92+
"""
93+
version_dirs = [ARTIFACTS_DIR / v for v in versions if (ARTIFACTS_DIR / v).is_dir()]
94+
95+
if not version_dirs:
96+
print("ERROR: No artifact directories found. Did fit step produce any output?")
97+
return False
98+
99+
X = np.load(str(ARTIFACTS_DIR / "X.npy"))
100+
head_predictions = np.load(str(ARTIFACTS_DIR / "HEAD" / "predictions.npy"))
101+
102+
import glum
103+
104+
current_version = glum.__version__
105+
print(f"Current glum version: {current_version}")
106+
print(f"Testing {len(version_dirs)} version(s): {[d.name for d in version_dirs]}\n")
107+
108+
failures = []
109+
110+
for version_dir in version_dirs:
111+
version = version_dir.name
112+
pickle_path = version_dir / "model.pkl"
113+
predictions_path = version_dir / "predictions.npy"
114+
115+
try:
116+
with open(pickle_path, "rb") as f:
117+
old_model = pickle.load(f)
118+
except Exception as e:
119+
failures.append(f"{version}: unpickling failed: {e}")
120+
continue
121+
122+
try:
123+
old_predictions = old_model.predict(X)
124+
except Exception as e:
125+
failures.append(f"{version}: predict() failed after unpickling: {e}")
126+
continue
127+
128+
stored_predictions = np.load(str(predictions_path))
129+
130+
try:
131+
np.testing.assert_allclose(
132+
old_predictions,
133+
stored_predictions,
134+
rtol=1e-5,
135+
err_msg=f"[{version}] Unpickled predictions do not match stored array",
136+
)
137+
print(f"[{version}] PASS: unpickled predictions match stored predictions")
138+
except AssertionError as e:
139+
failures.append(str(e))
140+
141+
try:
142+
np.testing.assert_allclose(
143+
old_predictions,
144+
head_predictions,
145+
rtol=1e-5,
146+
err_msg=f"[{version}] Predictions from old model do not match HEAD",
147+
)
148+
print(f"[{version}] PASS: old model predictions match HEAD")
149+
except AssertionError as e:
150+
failures.append(str(e))
151+
152+
print()
153+
if failures:
154+
print("FAILURES:")
155+
for msg in failures:
156+
print(f" - {msg}")
157+
return False
158+
159+
print(f"All {len(version_dirs)} version(s) passed.")
160+
return True
161+
162+
163+
def main() -> None:
164+
"""Fit HEAD and all released minor versions, then compare predictions."""
165+
write_dataset()
166+
167+
print("=== Fitting HEAD ===")
168+
if not fit_version("HEAD"):
169+
print("ERROR: Failed to fit HEAD model.")
170+
sys.exit(1)
171+
172+
print("\n=== Discovering glum versions from conda-forge ===")
173+
versions = discover_versions()
174+
print(f"Found {len(versions)} minor release(s): {' '.join(versions)}")
175+
176+
print("\n=== Generating compatibility artifacts ===")
177+
fitted_versions = []
178+
for version in versions:
179+
if version in SKIP_VERSIONS:
180+
print(f"--- Skipping glum=={version} (known incompatibility) ---")
181+
continue
182+
print(f"--- Fitting glum=={version} ---")
183+
if fit_version(version):
184+
fitted_versions.append(version)
185+
else:
186+
print(f"WARNING: glum=={version} failed, skipping.")
187+
188+
print("\n=== Comparing against HEAD ===")
189+
if not compare_versions(fitted_versions):
190+
sys.exit(1)
191+
192+
193+
if __name__ == "__main__":
194+
main()

0 commit comments

Comments
 (0)