Skip to content
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ examples/
├── hpo/ # Hyperparameter optimization
├── interpretability/ # SHAP and feature selection
├── many_class/ # Handling many classes
├── multioutput/ # Multi-output regression and classification
├── post_hoc_ensembles/ # Post-hoc ensembles
├── rf_pfn/ # Random Forest + TabPFN combinations
└── unsupervised/ # Unsupervised learning
Expand Down
74 changes: 74 additions & 0 deletions examples/multioutput/multioutput_prediction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Multi-output prediction workflows for TabPFN."""

from __future__ import annotations

import numpy as np
from sklearn.datasets import make_multilabel_classification, make_regression
from sklearn.metrics import r2_score, roc_auc_score
from sklearn.model_selection import train_test_split

from tabpfn_extensions.multioutput import (
TabPFNMultiOutputClassifier,
TabPFNMultiOutputRegressor,
)

# ---------------------------------------------------------------------------
# 1. Multi-output regression with missing features
Comment thread
noahho marked this conversation as resolved.
# ---------------------------------------------------------------------------

X_reg, y_reg = make_regression(
n_samples=120,
n_features=6,
n_targets=2,
n_informative=6,
noise=0.05,
random_state=0,
)
Comment thread
noahho marked this conversation as resolved.

X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(
X_reg, y_reg, test_size=0.3, random_state=42
)

regressor = TabPFNMultiOutputRegressor(n_estimators=4)
regressor.fit(X_reg_train, y_reg_train)

reg_predictions = regressor.predict(X_reg_test)
print("Regression predictions shape:", reg_predictions.shape)

r2_per_target = [
r2_score(y_reg_test[:, i], reg_predictions[:, i])
for i in range(reg_predictions.shape[1])
]
Comment thread
noahho marked this conversation as resolved.
print("Regression R2 per target:", r2_per_target)
print(
"Regression average R2:",
r2_score(y_reg_test, reg_predictions, multioutput="uniform_average"),
)

# ---------------------------------------------------------------------------
# 2. Multi-output classification (multi-label) with the same wrapper pattern
# ---------------------------------------------------------------------------

X_clf, y_clf = make_multilabel_classification(
n_samples=150,
n_features=6,
n_classes=3,
n_labels=2,
allow_unlabeled=False,
random_state=1,
)

X_clf = X_clf.astype(np.float32)
Comment thread
noahho marked this conversation as resolved.

X_clf_train, X_clf_test, y_clf_train, y_clf_test = train_test_split(
X_clf, y_clf, test_size=0.3, random_state=42
)

classifier = TabPFNMultiOutputClassifier(n_estimators=4)
classifier.fit(X_clf_train, y_clf_train)

clf_predictions = classifier.predict_proba(X_clf_test)
print("Classification predictions shape:", clf_predictions.shape)

micro_roc_auc = roc_auc_score(y_clf_test, clf_predictions, average="micro")
print("Classification micro-ROC-AUC:", micro_roc_auc)
3 changes: 3 additions & 0 deletions src/tabpfn_extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .embedding import TabPFNEmbedding
from .hpo import TunedTabPFNClassifier, TunedTabPFNRegressor
from .many_class import ManyClassClassifier
from .multioutput import TabPFNMultiOutputClassifier, TabPFNMultiOutputRegressor
from .post_hoc_ensembles import AutoTabPFNClassifier, AutoTabPFNRegressor
from .unsupervised import TabPFNUnsupervisedModel

Expand All @@ -26,6 +27,8 @@
"is_tabpfn",
"TabPFNEmbedding",
"ManyClassClassifier",
"TabPFNMultiOutputRegressor",
"TabPFNMultiOutputClassifier",
"TabPFNUnsupervisedModel",
"AutoTabPFNClassifier",
"AutoTabPFNRegressor",
Expand Down
75 changes: 75 additions & 0 deletions src/tabpfn_extensions/multioutput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Wrapper for multi-output learning with TabPFN."""

from __future__ import annotations

from typing import Any, TypeVar

from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor

from .utils import TabPFNClassifier, TabPFNRegressor

_EstimatorT = TypeVar("_EstimatorT")


class _TabPFNMultiOutputMixin:
"""Shared initialisation logic for TabPFN multi-output wrappers."""

_tabpfn_estimator_cls: type[_EstimatorT]

def __init__(
self,
estimator: _EstimatorT | None = None,
*,
n_preprocessing_jobs: int | None = None,
**tabpfn_params: Any,
) -> None:
if estimator is not None and tabpfn_params:
msg = "Provide either a custom estimator or tabpfn_params, not both."
raise ValueError(msg)

self._estimator_is_default = estimator is None
self.tabpfn_params = dict(tabpfn_params) if self._estimator_is_default else {}

if self._estimator_is_default:
estimator = self._tabpfn_estimator_cls(**tabpfn_params)

super().__init__(estimator=estimator, n_jobs=n_jobs)

Check failure on line 36 in src/tabpfn_extensions/multioutput.py

View workflow job for this annotation

GitHub Actions / Ruff Linting & Formatting

Ruff (F821)

src/tabpfn_extensions/multioutput.py:36:54: F821 Undefined name `n_jobs`

def get_params(
self, deep: bool = True
) -> dict[str, Any]: # pragma: no cover - delegating to sklearn
"""Return parameters for this estimator with TabPFN kwargs included."""
params = super().get_params(deep=deep)
if self._estimator_is_default:
params.pop("estimator", None)
params.update(self.tabpfn_params)
return params

def set_params(
self, **params: Any
) -> _TabPFNMultiOutputMixin: # pragma: no cover - delegating to sklearn
"""Update parameters while keeping TabPFN kwargs in sync."""
if getattr(self, "_estimator_is_default", False):
tabpfn_updates: dict[str, Any] = {}
for key in list(params):
if key in {"estimator", "n_jobs"}:
continue
tabpfn_updates[key] = params.pop(key)

if tabpfn_updates:
self.tabpfn_params.update(tabpfn_updates)
self.estimator = self._tabpfn_estimator_cls(**self.tabpfn_params)

return super().set_params(**params)


class TabPFNMultiOutputRegressor(_TabPFNMultiOutputMixin, MultiOutputRegressor):
"""A lightweight multi-output wrapper around :class:`TabPFNRegressor`."""

_tabpfn_estimator_cls = TabPFNRegressor


class TabPFNMultiOutputClassifier(_TabPFNMultiOutputMixin, MultiOutputClassifier):
"""A lightweight multi-output wrapper around :class:`TabPFNClassifier`."""

_tabpfn_estimator_cls = TabPFNClassifier
44 changes: 44 additions & 0 deletions tests/test_multioutput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Tests for the multi-output TabPFN wrappers."""

from __future__ import annotations

import numpy as np
import pytest
from sklearn.base import clone
from sklearn.datasets import make_regression
from sklearn.metrics import r2_score

from tabpfn_extensions.multioutput import (
TabPFNMultiOutputRegressor,
)


@pytest.mark.client_compatible
@pytest.mark.local_compatible
def test_multioutput_regression(backend):
"""TabPFN kwargs should be cloneable when estimator is created internally."""
X, y = make_regression(
n_samples=30,
n_features=4,
n_targets=2,
n_informative=4,
noise=0.2,
random_state=1,
)
Comment thread
noahho marked this conversation as resolved.
model = TabPFNMultiOutputRegressor()

model.fit(X, y)
predictions = model.predict(X)

assert predictions.shape == y.shape
assert np.isfinite(predictions).all()

cloned_model = clone(model)
cloned_model.fit(X, y)
cloned_predictions = cloned_model.predict(X)

assert cloned_predictions.shape == y.shape
assert np.isfinite(cloned_predictions).all()

cloned_score = r2_score(y, cloned_predictions, multioutput="uniform_average")
assert cloned_score > 0.2
Comment thread
noahho marked this conversation as resolved.
Loading