Skip to content

Commit 697396c

Browse files
Secboneclaude
andcommitted
feat: add card2pmml() for PMML export of scorecards
Add ScoreCard.card2pmml() method to export scorecard rules to PMML format, enabling deployment in Java/PMML ecosystems. This addresses the feature request from PR #124 with a cleaner implementation. Changes: - Add _build_numeric_expression() helper for ExpressionTransformer - Add card2pmml() method using sklearn2pmml pipeline - Add requirements-pmml.txt with sklearn2pmml/sklearn-pandas deps - Add [pmml] optional dependency group in pyproject.toml - Add comprehensive unit tests and integration tests (with skip guards) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8738990 commit 697396c

4 files changed

Lines changed: 220 additions & 2 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dynamic = [
2424
[tool.setuptools.dynamic]
2525
readme = {file = ["README.md"], content-type = "text/markdown"}
2626
dependencies = {file = ["requirements.txt"]}
27-
optional-dependencies = {nn = {file = ["requirements-nn.txt"]}, tools = {file = ["requirements-tools.txt"]}, all = {file = ["requirements-nn.txt", "requirements-tools.txt"]} }
27+
optional-dependencies = {nn = {file = ["requirements-nn.txt"]}, tools = {file = ["requirements-tools.txt"]}, pmml = {file = ["requirements-pmml.txt"]}, all = {file = ["requirements-nn.txt", "requirements-tools.txt", "requirements-pmml.txt"]} }
2828

2929
[build-system]
3030
requires = [

requirements-pmml.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
sklearn2pmml >= 0.80
2+
sklearn-pandas >= 2.0

toad/scorecard.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,49 @@
1515
FACTOR_UNKNOWN = 'UNKNOWN'
1616

1717

18+
def _build_numeric_expression(split_points, scores, nan_score=None):
19+
"""Build a nested if-else expression for ExpressionTransformer.
20+
21+
Args:
22+
split_points (ndarray): split point values
23+
scores (ndarray): scores array, length = len(split_points) + 1
24+
nan_score (float|None): score for NaN values
25+
26+
Returns:
27+
str: expression string for ExpressionTransformer
28+
"""
29+
n_splits = len(split_points)
30+
31+
if n_splits == 0:
32+
s = str(float(scores[0]))
33+
if nan_score is not None:
34+
return f'{nan_score} if pandas.isnull(X[0]) else {s}'
35+
return s
36+
37+
parts = []
38+
closing = ''
39+
40+
if nan_score is not None:
41+
parts.append(f'{nan_score} if pandas.isnull(X[0])')
42+
43+
for i in range(n_splits + 1):
44+
score = float(scores[i])
45+
if i == 0:
46+
if parts:
47+
parts.append(f' else ({score} if X[0] < {split_points[i]}')
48+
closing += ')'
49+
else:
50+
parts.append(f'{score} if X[0] < {split_points[i]}')
51+
elif i == n_splits:
52+
parts.append(f' else {score}')
53+
else:
54+
parts.append(f' else ({score} if X[0] < {split_points[i]}')
55+
closing += ')'
56+
57+
parts.append(closing)
58+
return ''.join(parts)
59+
60+
1861

1962
class ScoreCard(BaseEstimator, RulesMixin, BinsMixin):
2063
def __init__(self, pdo = 60, rate = 2, base_odds = 35, base_score = 750,
@@ -377,6 +420,88 @@ def after_export(self, card, to_frame = False, to_json = None, to_csv = None, **
377420
return card
378421

379422

423+
def card2pmml(self, pmml_path='scorecard.pmml', debug=False):
424+
"""Export scorecard to PMML format.
425+
426+
Args:
427+
pmml_path (str): path to write the PMML file
428+
debug (bool): if True, print debug info from sklearn2pmml
429+
430+
Requires:
431+
pip install toad[pmml] (sklearn2pmml >= 0.80, sklearn-pandas >= 2.0)
432+
Java 11+ runtime
433+
"""
434+
try:
435+
from sklearn_pandas import DataFrameMapper
436+
from sklearn.linear_model import LinearRegression
437+
from sklearn2pmml import sklearn2pmml, PMMLPipeline
438+
from sklearn2pmml.preprocessing import LookupTransformer, ExpressionTransformer
439+
except ImportError as e:
440+
raise ImportError(
441+
"card2pmml requires 'sklearn2pmml' and 'sklearn-pandas'. "
442+
"Install them with: pip install toad[pmml]"
443+
) from e
444+
445+
if not self.rules:
446+
raise RuntimeError(
447+
"No scorecard rules found. Call fit() or load() before card2pmml()."
448+
)
449+
450+
mapper = []
451+
for var, rule in self.rules.items():
452+
bins = rule['bins']
453+
scores = rule['scores']
454+
455+
if not np.issubdtype(bins.dtype, np.number):
456+
# Categorical feature
457+
mapping = {}
458+
default_value = 0.0
459+
for group, score in zip(bins, scores):
460+
score_f = float(score)
461+
if isinstance(group, str) and group == self.ELSE_GROUP:
462+
default_value = score_f
463+
elif isinstance(group, (list, np.ndarray)):
464+
for val in group:
465+
mapping[val] = score_f
466+
else:
467+
mapping[group] = score_f
468+
mapper.append((
469+
[var],
470+
LookupTransformer(mapping=mapping, default_value=default_value),
471+
))
472+
else:
473+
# Numeric feature
474+
has_nan = len(bins) > 0 and np.isnan(bins[-1])
475+
if has_nan:
476+
split_points = bins[:-1]
477+
split_scores = scores[:-1]
478+
nan_score = float(scores[-1])
479+
else:
480+
split_points = bins
481+
split_scores = scores
482+
nan_score = None
483+
484+
expression = _build_numeric_expression(
485+
split_points, split_scores, nan_score,
486+
)
487+
mapper.append(([var], ExpressionTransformer(expression)))
488+
489+
scorecard_mapper = DataFrameMapper(mapper, df_out=True)
490+
491+
feature_names = list(self.rules.keys())
492+
n_features = len(feature_names)
493+
lr = LinearRegression(fit_intercept=False)
494+
lr.coef_ = np.ones(n_features)
495+
lr.intercept_ = 0.0
496+
lr.n_features_in_ = n_features
497+
lr.feature_names_in_ = np.array(feature_names)
498+
499+
pipeline = PMMLPipeline([
500+
('preprocessing', scorecard_mapper),
501+
('scorecard', lr),
502+
])
503+
sklearn2pmml(pipeline, pmml_path, with_repr=True, debug=debug)
504+
380505

381506
def _generate_testing_frame(self, maps, size = 'max', mishap = True, gap = 1e-2):
382507
"""

toad/scorecard_test.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pandas as pd
44
from sklearn.linear_model import LogisticRegression
55

6-
from .scorecard import ScoreCard, WOETransformer, Combiner
6+
from .scorecard import ScoreCard, WOETransformer, Combiner, _build_numeric_expression
77

88
np.random.seed(1)
99

@@ -264,3 +264,94 @@ def test_predict_dict():
264264
proba = card.predict(df.iloc[404].to_dict())
265265
assert proba == TEST_SCORE
266266

267+
268+
# --- _build_numeric_expression tests ---
269+
270+
def test_build_numeric_expression_basic():
271+
expr = _build_numeric_expression(
272+
np.array([3.0, 5.0, 8.0]),
273+
np.array([100, 200, 300, 400]),
274+
)
275+
assert 'X[0] < 3.0' in expr
276+
assert 'X[0] < 5.0' in expr
277+
assert 'X[0] < 8.0' in expr
278+
assert '100.0' in expr
279+
assert '400.0' in expr
280+
assert 'isnull' not in expr
281+
282+
283+
def test_build_numeric_expression_with_nan():
284+
expr = _build_numeric_expression(
285+
np.array([3.0, 5.0]),
286+
np.array([100, 200, 300]),
287+
nan_score=500.0,
288+
)
289+
assert expr.startswith('500.0 if pandas.isnull(X[0])')
290+
assert '100.0' in expr
291+
assert '300.0' in expr
292+
293+
294+
def test_build_numeric_expression_no_splits():
295+
expr = _build_numeric_expression(np.array([]), np.array([42]))
296+
assert expr == '42.0'
297+
298+
299+
def test_build_numeric_expression_no_splits_with_nan():
300+
expr = _build_numeric_expression(np.array([]), np.array([42]), nan_score=99.0)
301+
assert '99.0' in expr
302+
assert '42.0' in expr
303+
assert 'isnull' in expr
304+
305+
306+
def test_build_numeric_expression_single_split():
307+
expr = _build_numeric_expression(np.array([5.0]), np.array([100, 200]))
308+
assert 'X[0] < 5.0' in expr
309+
assert '100.0' in expr
310+
assert '200.0' in expr
311+
312+
313+
# --- card2pmml tests ---
314+
315+
def test_card2pmml_missing_rules():
316+
sc = ScoreCard()
317+
with pytest.raises(RuntimeError, match='No scorecard rules'):
318+
sc.card2pmml()
319+
320+
321+
def test_card2pmml_import_error(monkeypatch):
322+
"""Verify helpful ImportError when sklearn2pmml is missing."""
323+
import builtins
324+
real_import = builtins.__import__
325+
326+
def mock_import(name, *args, **kwargs):
327+
if name == 'sklearn_pandas':
328+
raise ImportError('No module')
329+
return real_import(name, *args, **kwargs)
330+
331+
sc = ScoreCard().load(card_config)
332+
monkeypatch.setattr(builtins, '__import__', mock_import)
333+
with pytest.raises(ImportError, match='pip install toad\\[pmml\\]'):
334+
sc.card2pmml()
335+
336+
337+
@pytest.fixture
338+
def pmml_deps():
339+
pytest.importorskip('sklearn2pmml')
340+
pytest.importorskip('sklearn_pandas')
341+
import shutil
342+
if shutil.which('java') is None:
343+
pytest.skip('Java 11+ required')
344+
345+
346+
def test_card2pmml_from_config(pmml_deps, tmp_path):
347+
sc = ScoreCard().load(card_config)
348+
out = str(tmp_path / 'test_config.pmml')
349+
sc.card2pmml(out)
350+
assert (tmp_path / 'test_config.pmml').stat().st_size > 0
351+
352+
353+
def test_card2pmml_from_fitted(pmml_deps, tmp_path):
354+
out = str(tmp_path / 'test_fitted.pmml')
355+
card.card2pmml(out)
356+
assert (tmp_path / 'test_fitted.pmml').stat().st_size > 0
357+

0 commit comments

Comments
 (0)