Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion examples/pcovc/KPCovC_Comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

random_state = 0
n_components = 2
scale_z = True

# %%
#
Expand Down Expand Up @@ -85,7 +86,7 @@
# Both PCA and PCovC fail to produce linearly separable latent space
# maps. We will need a kernel method to effectively separate the moon classes.

mixing = 0.10
mixing = 0.5
alpha_d = 0.5
alpha_p = 0.4

Expand All @@ -95,6 +96,7 @@
n_components=n_components,
random_state=random_state,
mixing=mixing,
scale_z=scale_z,
classifier=LinearSVC(),
): "PCovC",
}
Expand Down Expand Up @@ -138,6 +140,7 @@
random_state=random_state,
mixing=mixing,
center=center,
scale_z=scale_z,
**kernel_params,
): {"title": "Kernel PCovC", "eps": 2},
}
Expand Down Expand Up @@ -220,6 +223,7 @@
mixing=mixing,
classifier=model,
center=center,
scale_z=scale_z,
**models[model]["kernel_params"],
)
t_kpcovc_train = kpcovc.fit_transform(X_train_scaled, y_train)
Expand Down
6 changes: 4 additions & 2 deletions examples/pcovc/KPCovC_Hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@
fig, axs = plt.subplots(2, len(kernels), figsize=(len(kernels) * 4, 8))

center = True
mixing = 0.10
mixing = 0.5
scale_z = True

for i, kernel in enumerate(kernels):
kpca = KernelPCA(
Expand All @@ -83,6 +84,7 @@
random_state=random_state,
**kernel_params.get(kernel, {}),
center=center,
scale_z=scale_z,
)
t_kpcovc = kpcovc.fit_transform(X_scaled, y)

Expand Down Expand Up @@ -118,7 +120,7 @@
kpcovc = KernelPCovC(
n_components=n_components,
random_state=random_state,
mixing=mixing,
mixing=0.1,
center=center,
kernel="rbf",
gamma=gamma,
Expand Down
2 changes: 2 additions & 0 deletions examples/pcovc/PCovC_Hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
n_components=n_components,
random_state=random_state,
classifier=LogisticRegressionCV(),
scale_z=True,
)

pcovc.fit(X_scaled, y)
Expand Down Expand Up @@ -120,6 +121,7 @@
n_components=n_components,
random_state=random_state,
classifier=model,
scale_z=True,
)

pcovc.fit(X_scaled, y)
Expand Down
44 changes: 42 additions & 2 deletions src/skmatter/decomposition/_kernel_pcovc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
import numpy as np

from sklearn import clone
Expand All @@ -16,7 +17,7 @@
from sklearn.linear_model._base import LinearClassifierMixin
from sklearn.utils.multiclass import check_classification_targets, type_of_target

from skmatter.preprocessing import KernelNormalizer
from skmatter.preprocessing import KernelNormalizer, StandardFlexibleScaler
from skmatter.utils import check_cl_fit
from skmatter.decomposition import _BaseKPCov

Expand Down Expand Up @@ -86,6 +87,9 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
If None, ``sklearn.linear_model.LogisticRegression()``
is used as the classifier.

scale_z: bool, default=False
Whether to scale Z prior to eigendecomposition.

kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear"
Kernel.

Expand Down Expand Up @@ -116,6 +120,14 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
and for matrix inversions.
Must be of range [0.0, infinity).

z_mean_tol: float, default=1e-12
Tolerance for the column means of Z.
Must be of range [0.0, infinity).

z_var_tol: float, default=1.5
Tolerance for the column variances of Z.
Must be of range [0.0, infinity).

n_jobs : int, default=None
The number of parallel jobs to run.
:obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
Expand Down Expand Up @@ -167,14 +179,17 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
The data used to fit the model. This attribute is used to build kernels
from new data.

scale_z: bool
Whether Z is being scaled prior to eigendecomposition.

Examples
--------
>>> import numpy as np
>>> from skmatter.decomposition import KernelPCovC
>>> from sklearn.preprocessing import StandardScaler
>>> X = np.array([[-2, 3, -1, 0], [2, 0, -3, 1], [3, 0, -1, 3], [2, -2, 1, 0]])
>>> X = StandardScaler().fit_transform(X)
>>> Y = np.array([[2], [0], [1], [2]])
>>> Y = np.array([2, 0, 1, 2])
>>> kpcovc = KernelPCovC(
... mixing=0.1,
... n_components=2,
Expand All @@ -200,6 +215,7 @@ def __init__(
n_components=None,
svd_solver="auto",
classifier=None,
scale_z=False,
kernel="linear",
gamma=None,
degree=3,
Expand All @@ -208,6 +224,8 @@ def __init__(
center=False,
fit_inverse_transform=False,
tol=1e-12,
z_mean_tol=1e-12,
z_var_tol=1.5,
n_jobs=None,
iterated_power="auto",
random_state=None,
Expand All @@ -229,6 +247,9 @@ def __init__(
fit_inverse_transform=fit_inverse_transform,
)
self.classifier = classifier
self.scale_z = scale_z
self.z_mean_tol = z_mean_tol
self.z_var_tol = z_var_tol

def fit(self, X, Y, W=None):
r"""Fit the model with X and Y.
Expand Down Expand Up @@ -323,6 +344,25 @@ def fit(self, X, Y, W=None):
W = LogisticRegression().fit(K, Y).coef_.T

Z = K @ W
if self.scale_z:
Z = StandardFlexibleScaler().fit_transform(Z)

z_means_ = np.mean(Z, axis=0)
z_vars_ = np.var(Z, axis=0)

if np.max(np.abs(z_means_)) > self.z_mean_tol:
warnings.warn(
"This class does not automatically center Z, and the column means "
"of Z are greater than the supplied tolerance. We recommend scaling "
"Z (and the weights) by setting `scale_z=True`."
)

if np.max(z_vars_) > self.z_var_tol:
warnings.warn(
"This class does not automatically scale Z, and the column variances "
"of Z are greater than the supplied tolerance. We recommend scaling "
"Z (and the weights) by setting `scale_z=True`."
)

self._fit(K, Z, W)

Expand Down
54 changes: 53 additions & 1 deletion src/skmatter/decomposition/_pcovc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from sklearn.utils.validation import check_is_fitted, validate_data
from skmatter.decomposition import _BasePCov
from skmatter.utils import check_cl_fit
from skmatter.preprocessing import StandardFlexibleScaler
import warnings


class PCovC(LinearClassifierMixin, _BasePCov):
Expand Down Expand Up @@ -96,6 +98,14 @@ class PCovC(LinearClassifierMixin, _BasePCov):
Tolerance for singular values computed by svd_solver == 'arpack'.
Must be of range [0.0, infinity).

z_mean_tol: float, default=1e-12
Tolerance for the column means of Z.
Must be of range [0.0, infinity).

z_var_tol: float, default=1.5
Tolerance for the column variances of Z.
Must be of range [0.0, infinity).

space: {'feature', 'sample', 'auto'}, default='auto'
whether to compute the PCovC in ``sample`` or ``feature`` space.
The default is equal to ``sample`` when :math:`{n_{samples} < n_{features}}`
Expand Down Expand Up @@ -123,6 +133,9 @@ class PCovC(LinearClassifierMixin, _BasePCov):
If None, ``sklearn.linear_model.LogisticRegression()``
is used as the classifier.

scale_z: bool, default=False
Whether to scale Z prior to eigendecomposition.

iterated_power : int or 'auto', default='auto'
Number of iterations for the power method computed by
svd_solver == 'randomized'.
Expand All @@ -143,6 +156,14 @@ class PCovC(LinearClassifierMixin, _BasePCov):
Tolerance for singular values computed by svd_solver == 'arpack'.
Must be of range [0.0, infinity).

z_mean_tol: float
Tolerance for the column means of Z.
Must be of range [0.0, infinity).

z_var_tol: float
Tolerance for the column variances of Z.
Must be of range [0.0, infinity).

space: {'feature', 'sample', 'auto'}, default='auto'
whether to compute the PCovC in ``sample`` or ``feature`` space.
The default is equal to ``sample`` when :math:`{n_{samples} < n_{features}}`
Expand Down Expand Up @@ -174,6 +195,9 @@ class PCovC(LinearClassifierMixin, _BasePCov):
the projector, or weights, from the latent-space projection
:math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`

scale_z: bool
Whether Z is being scaled prior to eigendecomposition

explained_variance_ : numpy.ndarray of shape (n_components,)
The amount of variance explained by each of the selected components.
Equal to n_components largest eigenvalues
Expand Down Expand Up @@ -208,8 +232,11 @@ def __init__(
n_components=None,
svd_solver="auto",
tol=1e-12,
z_mean_tol=1e-12,
z_var_tol=1.5,
space="auto",
classifier=None,
scale_z=False,
iterated_power="auto",
random_state=None,
whiten=False,
Expand All @@ -225,6 +252,9 @@ def __init__(
whiten=whiten,
)
self.classifier = classifier
self.scale_z = scale_z
self.z_mean_tol = z_mean_tol
self.z_var_tol = z_var_tol

def fit(self, X, Y, W=None):
r"""Fit the model with X and Y.
Expand Down Expand Up @@ -291,7 +321,7 @@ def fit(self, X, Y, W=None):
classifier = self.classifier

self.z_classifier_ = check_cl_fit(classifier, X, Y)
W = self.z_classifier_.coef_.T
W = self.z_classifier_.coef_.T.copy()

else:
# If precomputed, use default classifier to predict Y from T
Expand All @@ -301,6 +331,28 @@ def fit(self, X, Y, W=None):

Z = X @ W

if self.scale_z:
z_scaler = StandardFlexibleScaler().fit(Z)
Z = z_scaler.transform(Z)
W /= z_scaler.scale_.reshape(1, -1)

z_means_ = np.mean(Z, axis=0)
z_vars_ = np.var(Z, axis=0)

if np.max(np.abs(z_means_)) > self.z_mean_tol:
warnings.warn(
"This class does not automatically center Z, and the column means "
"of Z are greater than the supplied tolerance. We recommend scaling "
"Z (and the weights) by setting `scale_z=True`."
)

if np.max(z_vars_) > self.z_var_tol:
warnings.warn(
"This class does not automatically scale Z, and the column variances "
"of Z are greater than the supplied tolerance. We recommend scaling "
"Z (and the weights) by setting `scale_z=True`."
)

if self.space_ == "feature":
self._fit_feature_space(X, Y, Z)
else:
Expand Down
41 changes: 41 additions & 0 deletions tests/test_kernel_pcovc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
import warnings

import numpy as np
from sklearn import exceptions
Expand Down Expand Up @@ -34,10 +35,12 @@ def __init__(self, *args, **kwargs):
lambda mixing=0.5,
classifier=LogisticRegression(),
n_components=4,
scale_z=True,
**kwargs: KernelPCovC(
mixing=mixing,
classifier=classifier,
n_components=n_components,
scale_z=scale_z,
svd_solver=kwargs.pop("svd_solver", "full"),
**kwargs,
)
Expand Down Expand Up @@ -327,6 +330,44 @@ def test_precomputed_classification(self):
self.assertTrue(np.linalg.norm(t3 - t2) < self.error_tol)
self.assertTrue(np.linalg.norm(t3 - t1) < self.error_tol)

def test_scale_z_parameter(self):
"""Check that changing scale_z changes the eigendecomposition."""
kpcovc_scaled = self.model(scale_z=True)
kpcovc_scaled.fit(self.X, self.Y)

kpcovc_unscaled = self.model(scale_z=False)
kpcovc_unscaled.fit(self.X, self.Y)
assert not np.allclose(kpcovc_scaled.pkt_, kpcovc_unscaled.pkt_)

def test_z_scaling(self):
"""
Check that KPCovC raises a warning if Z is not of scale, and does not
if it is.
"""
kpcovc = self.model(n_components=2, scale_z=True)

with warnings.catch_warnings():
kpcovc.fit(self.X, self.Y)
warnings.simplefilter("error")
self.assertEqual(1 + 1, 2)

kpcovc = self.model(n_components=2, scale_z=False, z_mean_tol=0, z_var_tol=0)

with warnings.catch_warnings(record=True) as w:
kpcovc.fit(self.X, self.Y)
self.assertEqual(
str(w[0].message),
"This class does not automatically center Z, and the column means "
"of Z are greater than the supplied tolerance. We recommend scaling "
"Z (and the weights) by setting `scale_z=True`.",
)
self.assertEqual(
str(w[1].message),
"This class does not automatically scale Z, and the column variances "
"of Z are greater than the supplied tolerance. We recommend scaling "
"Z (and the weights) by setting `scale_z=True`.",
)


class KernelTests(KernelPCovCBaseTest):
def test_kernel_types(self):
Expand Down
Loading
Loading