From 316fe305983035820f0db737c655fba0b71f3909 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna Date: Tue, 28 Oct 2025 16:11:52 +0100 Subject: [PATCH 1/8] Improve docs, small refactor of measures --- .../metrics/_reconstruction_measures.py | 259 +++++++++--------- 1 file changed, 124 insertions(+), 135 deletions(-) diff --git a/src/skmatter/metrics/_reconstruction_measures.py b/src/skmatter/metrics/_reconstruction_measures.py index 22f3dba51..7f3112a51 100644 --- a/src/skmatter/metrics/_reconstruction_measures.py +++ b/src/skmatter/metrics/_reconstruction_measures.py @@ -44,46 +44,28 @@ def pointwise_global_reconstruction_error( X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``test_idx`` is used. If ``train_size`` is - also None, 2-fold split is taken. + Array of indices used for training. If None, the complement of the ``test_idx`` + is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``train_idx`` is used. If ``test_size`` is - also None, 2-fold split is taken. - scaler : object implementing fit/transfom - Scales the X and Y before computing the reconstruction measure. - The default value scales the features such that the reconstruction - measure on the training set is upper bounded to 1. - estimator : object implementing fit/predict, default=None - Sklearn estimator used to reconstruct features/samples. + Array of indices used for testing. If None, the complement of the ``train_idx`` + is used. If ``test_size`` is also None, 2-fold split is taken. + scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` + Scales X and Y before computing the reconstruction measure. The default value + scales the features such that the reconstruction measure on the training set is + upper bounded to 1. + estimator : object implementing fit/predict, default=``Ridge2FoldCV`` + Sklearn estimator used to reconstruct test features/samples. Returns ------- pointwise_global_reconstruction_error : numpy.ndarray - The global reconstruction error for each sample/point + The global reconstruction error for each test sample/point. """ - ( - train_idx, - test_idx, - scaler, - estimator, - ) = check_global_reconstruction_measures_input( + train_idx, test_idx, scaler, estimator = check_global_reconstruction_measures_input( X, Y, train_idx, test_idx, scaler, estimator ) - X_train, X_test, Y_train, Y_test = ( - X[train_idx], - X[test_idx], - Y[train_idx], - Y[test_idx], - ) - scaler.fit(X_train) - X_train = scaler.transform(X_train) - X_test = scaler.transform(X_test) - scaler.fit(Y_train) - Y_train = scaler.transform(Y_train) - Y_test = scaler.transform(Y_test) + X_train, X_test, Y_train, Y_test = _prepare_data(X, Y, train_idx, test_idx, scaler) estimator.fit(X_train, Y_train) @@ -120,27 +102,25 @@ def global_reconstruction_error( Parameters ---------- X : numpy.ndarray of shape (n_samples, X_n_features) - Source data which reconstructs target Y. - For feature reconstruction of Y using X use input shape (samples, features). - For sample reconstruction of Y using X use input shape (features, samples). + Source data which reconstructs target Y. For feature reconstruction of Y using X + use input shape (samples, features). For sample reconstruction of Y using X use + input shape (features, samples). Y : numpy.ndarray of shape (n_samples, Y_n_targets) - Target data which is reconstructed with X. - For feature reconstruction of Y using X use input shape (samples, features). - For sample reconstruction of Y using X use input shape (features, samples). + Target data which is reconstructed with X. For feature reconstruction of Y using + X use input shape (samples, features). For sample reconstruction of Y using X + use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``test_idx`` is used. If ``train_size`` is - also None, 2-fold split is taken. + Array of indices used for training. If None, the complement of the ``test_idx`` + is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``train_idx`` is used. If ``test_size`` is - also None, 2-fold split is taken. - scaler : object implementing fit/transfom - Scales the X and Y before computing the reconstruction measure. - The default value scales the features such that the reconstruction - measure on the training set is upper bounded to 1. - estimator : object implementing fit/predict, default=None - Sklearn estimator used to reconstruct features/samples. + Array of indices used for testing. If None, the complement of the ``train_idx`` + is used. If ``test_size`` is also None, 2-fold split is taken. + scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` + Scales X and Y before computing the reconstruction measure. The default value + scales the features such that the reconstruction measure on the training set is + upper bounded to 1. + estimator : object implementing fit/predict, default=``Ridge2FoldCV`` + Sklearn estimator used to reconstruct test features/samples. Returns ------- @@ -201,46 +181,28 @@ def pointwise_global_reconstruction_distortion( For feature reconstruction of Y using X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``test_idx`` is used. If ``train_size`` is - also None, 2-fold split is taken. + Array of indices used for training. If None, the complement of the ``test_idx`` + is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``train_idx`` is used. If ``test_size`` is - also None, 2-fold split is taken. - scaler : object implementing fit/transfom - Scales the X and Y before computing the reconstruction measure. - The default value scales the features such that the reconstruction - measure on the training set is upper bounded to 1. - estimator : object implementing fit/predict, default=None - Sklearn estimator used to reconstruct features/samples. + Array of indices used for testing. If None, the complement of the ``train_idx`` + is used. If ``test_size`` is also None, 2-fold split is taken. + scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` + Scales X and Y before computing the reconstruction measure. The default value + scales the features such that the reconstruction measure on the training set is + upper bounded to 1. + estimator : object implementing fit/predict, default=``Ridge2FoldCV`` + Sklearn estimator used to reconstruct test features/samples. Returns ------- pointwise_global_reconstruction_distortion : ndarray The global reconstruction distortion for each sample/point """ - ( - train_idx, - test_idx, - scaler, - estimator, - ) = check_global_reconstruction_measures_input( + train_idx, test_idx, scaler, estimator = check_global_reconstruction_measures_input( X, Y, train_idx, test_idx, scaler, estimator ) - X_train, X_test, Y_train, Y_test = ( - X[train_idx], - X[test_idx], - Y[train_idx], - Y[test_idx], - ) - scaler.fit(X_train) - X_train = scaler.transform(X_train) - X_test = scaler.transform(X_test) - scaler.fit(Y_train) - Y_train = scaler.transform(Y_train) - Y_test = scaler.transform(Y_test) + X_train, X_test, Y_train, _Y_test = _prepare_data(X, Y, train_idx, test_idx, scaler) predictions_Y_test = estimator.fit(X_train, Y_train).predict(X_test) orthogonal_predictions_Y_test = ( @@ -291,19 +253,17 @@ def global_reconstruction_distortion( For feature reconstruction of Y using X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``test_idx`` is used. If ``train_size`` is - also None, 2-fold split is taken. + Array of indices used for training. If None, the complement of the ``test_idx`` + is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``train_idx`` is used. If ``test_size`` is - also None, 2-fold split is taken. - scaler : object implementing fit/transfom - Scales the X and Y before computing the reconstruction measure. - The default value scales the features such that the reconstruction - measure on the training set is upper bounded to 1. - estimator : object implementing fit/predict, default=None - Sklearn estimator used to reconstruct features/samples. + Array of indices used for testing. If None, the complement of the ``train_idx`` + is used. If ``test_size`` is also None, 2-fold split is taken. + scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` + Scales X and Y before computing the reconstruction measure. The default value + scales the features such that the reconstruction measure on the training set is + upper bounded to 1. + estimator : object implementing fit/predict, default=``Ridge2FoldCV`` + Sklearn estimator used to reconstruct test features/samples. Returns ------- @@ -373,19 +333,23 @@ def pointwise_local_reconstruction_error( Number of neighbour points used to compute the local reconstruction weight for each sample/point. train_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``test_idx`` is used. If ``train_size`` is - also None, 2-fold split is taken. + Array of indices used for training. If None, the complement of the ``test_idx`` + is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``train_idx`` is used. If ``test_size`` is - also None, 2-fold split is taken. - scaler : object implementing fit/transfom - Scales the X and Y before computing the reconstruction measure. - The default value scales the features such that the reconstruction - measure on the training set is upper bounded to 1. - estimator : object implementing fit/predict, default=None - Sklearn estimator used to reconstruct features/samples. + Array of indices used for testing. If None, the complement of the ``train_idx`` + is used. If ``test_size`` is also None, 2-fold split is taken. + scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` + Scales X and Y before computing the reconstruction measure. The default value + scales the features such that the reconstruction measure on the training set is + upper bounded to 1. + estimator : object implementing fit/predict, default=``Ridge2FoldCV`` + Sklearn estimator used to reconstruct test features/samples. + n_jobs : int, default=None + The number of CPUs to use to do the computation. + :obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See + `n_jobs glossary from sklearn (external link) `_ + for more details. Returns ------- @@ -393,27 +357,11 @@ def pointwise_local_reconstruction_error( The local reconstruction error for each sample/point """ - ( - train_idx, - test_idx, - scaler, - estimator, - ) = check_local_reconstruction_measures_input( + train_idx, test_idx, scaler, estimator = check_local_reconstruction_measures_input( X, Y, n_local_points, train_idx, test_idx, scaler, estimator ) - X_train, X_test, Y_train, Y_test = ( - X[train_idx], - X[test_idx], - Y[train_idx], - Y[test_idx], - ) - scaler.fit(X_train) - X_train = scaler.transform(X_train) - X_test = scaler.transform(X_test).astype(X_train.dtype) - scaler.fit(Y_train) - Y_train = scaler.transform(Y_train) - Y_test = scaler.transform(Y_test) + X_train, X_test, Y_train, Y_test = _prepare_data(X, Y, train_idx, test_idx, scaler) squared_dist = ( np.sum(X_train**2, axis=1) @@ -496,19 +444,23 @@ def local_reconstruction_error( Number of neighbour points used to compute the local reconstruction weight for each sample/point. train_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``test_idx`` is used. If ``train_size`` is - also None, 2-fold split is taken. + Array of indices used for training. If None, the complement of the ``test_idx`` + is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - array of indices used for training, if None, - If None, the complement of the ``train_idx`` is used. If ``test_size`` is - also None, 2-fold split is taken. - scaler : object implementing fit/transfom - Scales the X and Y before computing the reconstruction measure. - The default value scales the features such that the reconstruction - measure on the training set is upper bounded to 1. - estimator : object implementing fit/predict, default=None - Sklearn estimator used to reconstruct features/samples. + Array of indices used for testing. If None, the complement of the ``train_idx`` + is used. If ``test_size`` is also None, 2-fold split is taken. + scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` + Scales X and Y before computing the reconstruction measure. The default value + scales the features such that the reconstruction measure on the training set is + upper bounded to 1. + estimator : object implementing fit/predict, default=``Ridge2FoldCV`` + Sklearn estimator used to reconstruct test features/samples. + n_jobs : int, default=None + The number of CPUs to use to do the computation. + :obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See + `n_jobs glossary from sklearn (external link) `_ + for more details. Returns ------- @@ -534,7 +486,11 @@ def check_global_reconstruction_measures_input( X, Y, train_idx, test_idx, scaler, estimator ): """Returns default reconstruction measure inputs for all None parameters""" - assert len(X) == len(Y) + if X.shape[0] != Y.shape[0]: + raise ValueError( + f"First dimension of X ({X.shape[0]}) and Y ({Y.shape[0]}) must match" + ) + if (train_idx is None) and (test_idx is None): train_idx, test_idx = train_test_split( np.arange(len(X)), @@ -562,6 +518,7 @@ def check_global_reconstruction_measures_input( scoring="neg_root_mean_squared_error", n_jobs=1, ) + return train_idx, test_idx, scaler, estimator @@ -570,7 +527,39 @@ def check_local_reconstruction_measures_input( ): """Returns default reconstruction measure inputs for all None parameters""" # only needs to check one extra parameter - assert len(X) >= n_local_points + if len(X) < n_local_points: + raise ValueError( + f"X has {len(X)} samples but n_local_points={n_local_points}. " + "Must have at least n_local_points samples" + ) + return check_global_reconstruction_measures_input( X, Y, train_idx, test_idx, scaler, estimator ) + + +def _prepare_data(X, Y, train_idx, test_idx, scaler): + """ + Split and scale data for reconstruction measures + + Parameters + ---------- + X, Y : array-like + Input data + train_idx, test_idx : array-like + Indices for train/test split + scaler : object + Fitted scaler + """ + X_train, X_test = X[train_idx], X[test_idx] + Y_train, Y_test = Y[train_idx], Y[test_idx] + + scaler.fit(X_train) + X_train_scaled = scaler.transform(X_train) + X_test_scaled = scaler.transform(X_test) + + scaler.fit(Y_train) + Y_train_scaled = scaler.transform(Y_train) + Y_test_scaled = scaler.transform(Y_test) + + return X_train_scaled, X_test_scaled, Y_train_scaled, Y_test_scaled From c5298414303886c00e3b3d6d53b532575f42a3f1 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna <71195115+sofiia-chorna@users.noreply.github.com> Date: Wed, 29 Oct 2025 12:05:17 +0100 Subject: [PATCH 2/8] Update src/skmatter/metrics/_reconstruction_measures.py Co-authored-by: Philip Loche --- src/skmatter/metrics/_reconstruction_measures.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/skmatter/metrics/_reconstruction_measures.py b/src/skmatter/metrics/_reconstruction_measures.py index 7f3112a51..550f11339 100644 --- a/src/skmatter/metrics/_reconstruction_measures.py +++ b/src/skmatter/metrics/_reconstruction_measures.py @@ -539,18 +539,7 @@ def check_local_reconstruction_measures_input( def _prepare_data(X, Y, train_idx, test_idx, scaler): - """ - Split and scale data for reconstruction measures - - Parameters - ---------- - X, Y : array-like - Input data - train_idx, test_idx : array-like - Indices for train/test split - scaler : object - Fitted scaler - """ + """Split and scale data for reconstruction measures""" X_train, X_test = X[train_idx], X[test_idx] Y_train, Y_test = Y[train_idx], Y[test_idx] From 73d706c33688f615f1862f4c77582418c44412d2 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna <71195115+sofiia-chorna@users.noreply.github.com> Date: Wed, 29 Oct 2025 12:24:18 +0100 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: Philip Loche --- .../metrics/_reconstruction_measures.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/skmatter/metrics/_reconstruction_measures.py b/src/skmatter/metrics/_reconstruction_measures.py index 550f11339..74a5d29c6 100644 --- a/src/skmatter/metrics/_reconstruction_measures.py +++ b/src/skmatter/metrics/_reconstruction_measures.py @@ -44,11 +44,11 @@ def pointwise_global_reconstruction_error( X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If None, the complement of the ``test_idx`` + Array of indices used for training. If ``None``, the complement of the ``test_idx`` is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If None, the complement of the ``train_idx`` - is used. If ``test_size`` is also None, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the ``train_idx`` + is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -111,7 +111,7 @@ def global_reconstruction_error( use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also None, 2-fold split is taken. + is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None Array of indices used for testing. If None, the complement of the ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. @@ -253,10 +253,10 @@ def global_reconstruction_distortion( For feature reconstruction of Y using X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If None, the complement of the ``test_idx`` + Array of indices used for training. If ``None``, the complement of the ``test_idx`` is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If None, the complement of the ``train_idx`` + Array of indices used for testing. If ``None``, the complement of the ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value @@ -334,9 +334,9 @@ def pointwise_local_reconstruction_error( each sample/point. train_idx : numpy.ndarray, dtype=int, default=None Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also None, 2-fold split is taken. + is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If None, the complement of the ``train_idx`` + Array of indices used for testing. If ``None``, the complement of the ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value @@ -445,7 +445,7 @@ def local_reconstruction_error( each sample/point. train_idx : numpy.ndarray, dtype=int, default=None Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also None, 2-fold split is taken. + is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None Array of indices used for testing. If None, the complement of the ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. From 6fdf0463c8c3643a3189d0fc9500eb7e9a896942 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna Date: Wed, 29 Oct 2025 12:35:48 +0100 Subject: [PATCH 4/8] Fix doc line length --- .../metrics/_reconstruction_measures.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/skmatter/metrics/_reconstruction_measures.py b/src/skmatter/metrics/_reconstruction_measures.py index 74a5d29c6..7b8ca94cc 100644 --- a/src/skmatter/metrics/_reconstruction_measures.py +++ b/src/skmatter/metrics/_reconstruction_measures.py @@ -44,11 +44,11 @@ def pointwise_global_reconstruction_error( X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If ``None``, the complement of the ``test_idx`` - is used. If ``train_size`` is also None, 2-fold split is taken. + Array of indices used for training. If ``None``, the complement of the + ``test_idx`` is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If ``None``, the complement of the ``train_idx`` - is used. If ``test_size`` is also ``None``, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the + ``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -253,11 +253,11 @@ def global_reconstruction_distortion( For feature reconstruction of Y using X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If ``None``, the complement of the ``test_idx`` - is used. If ``train_size`` is also None, 2-fold split is taken. + Array of indices used for training. If ``None``, the complement of the + ``test_idx`` is used. If ``train_size`` is also None, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If ``None``, the complement of the ``train_idx`` - is used. If ``test_size`` is also None, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the + ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -336,8 +336,8 @@ def pointwise_local_reconstruction_error( Array of indices used for training. If None, the complement of the ``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If ``None``, the complement of the ``train_idx`` - is used. If ``test_size`` is also None, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the + ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is From 9f6d88c89cb8a4d120c5f96dc7b99b3c0f45f1c2 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna Date: Wed, 29 Oct 2025 12:50:24 +0100 Subject: [PATCH 5/8] Hightlight more Nones --- .../metrics/_reconstruction_measures.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/skmatter/metrics/_reconstruction_measures.py b/src/skmatter/metrics/_reconstruction_measures.py index 7b8ca94cc..0988562ac 100644 --- a/src/skmatter/metrics/_reconstruction_measures.py +++ b/src/skmatter/metrics/_reconstruction_measures.py @@ -110,11 +110,11 @@ def global_reconstruction_error( X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also ``None``, 2-fold split is taken. + Array of indices used for training. If ``None``, the complement of the + ``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If None, the complement of the ``train_idx`` - is used. If ``test_size`` is also None, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the + ``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -181,11 +181,11 @@ def pointwise_global_reconstruction_distortion( For feature reconstruction of Y using X use input shape (samples, features). For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also None, 2-fold split is taken. + Array of indices used for training. If ``None``, the complement of the + ``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If None, the complement of the ``train_idx`` - is used. If ``test_size`` is also None, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the + ``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -254,10 +254,10 @@ def global_reconstruction_distortion( For sample reconstruction of Y using X use input shape (features, samples). train_idx : numpy.ndarray, dtype=int, default=None Array of indices used for training. If ``None``, the complement of the - ``test_idx`` is used. If ``train_size`` is also None, 2-fold split is taken. + ``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None Array of indices used for testing. If ``None``, the complement of the - ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. + ``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -333,11 +333,11 @@ def pointwise_local_reconstruction_error( Number of neighbour points used to compute the local reconstruction weight for each sample/point. train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also ``None``, 2-fold split is taken. + Array of indices used for training. If ``None``, the complement of the + ``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None Array of indices used for testing. If ``None``, the complement of the - ``train_idx`` is used. If ``test_size`` is also None, 2-fold split is taken. + ``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is @@ -444,11 +444,11 @@ def local_reconstruction_error( Number of neighbour points used to compute the local reconstruction weight for each sample/point. train_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for training. If None, the complement of the ``test_idx`` - is used. If ``train_size`` is also ``None``, 2-fold split is taken. + Array of indices used for training. If ``None``, the complement of the + ``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken. test_idx : numpy.ndarray, dtype=int, default=None - Array of indices used for testing. If None, the complement of the ``train_idx`` - is used. If ``test_size`` is also None, 2-fold split is taken. + Array of indices used for testing. If ``None``, the complement of the + ``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken. scaler : object implementing fit/transform, default=``StandardFlexibleScaler`` Scales X and Y before computing the reconstruction measure. The default value scales the features such that the reconstruction measure on the training set is From 9e64145f3a62bc9409d5c3ee75540075249233de Mon Sep 17 00:00:00 2001 From: Sofiia Chorna Date: Wed, 29 Oct 2025 12:54:17 +0100 Subject: [PATCH 6/8] Add test --- tests/test_metrics.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 9ccd57477..1a6467029 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -6,6 +6,7 @@ from skmatter.datasets import load_degenerate_CH4_manifold from skmatter.metrics import ( + check_global_reconstruction_measures_input, componentwise_prediction_rigidity, global_reconstruction_distortion, global_reconstruction_error, @@ -214,6 +215,17 @@ def test_local_reconstruction_error_test_idx(self): f"size {test_size}", ) + def test_source_target_len(self): + # tests that the source and target features have the same lenght + X = np.array([[1, 2, 3], [4, 5, 6]]) + Y = np.array([[1, 2, 3]]) + + with self.assertRaises(ValueError) as context: + check_global_reconstruction_measures_input(X, Y) + + expected_message = "First dimension of X (2) and Y (1) must match" + self.assertEqual(str(context.exception), expected_message) + class DistanceTests(unittest.TestCase): @classmethod From 0e81efad6a1f00a82c48a2746f24c30f8b0afff4 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna Date: Wed, 29 Oct 2025 12:59:23 +0100 Subject: [PATCH 7/8] Provide arguments in the test --- tests/test_metrics.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 1a6467029..1f8dad68f 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -220,8 +220,15 @@ def test_source_target_len(self): X = np.array([[1, 2, 3], [4, 5, 6]]) Y = np.array([[1, 2, 3]]) + train_idx = [0] + test_idx = [1] + scaler = None + estimator = None + with self.assertRaises(ValueError) as context: - check_global_reconstruction_measures_input(X, Y) + check_global_reconstruction_measures_input( + X, Y, train_idx, test_idx, scaler, estimator + ) expected_message = "First dimension of X (2) and Y (1) must match" self.assertEqual(str(context.exception), expected_message) From fc2014cee2af0333aec017e41dc678423e9b3202 Mon Sep 17 00:00:00 2001 From: Sofiia Chorna Date: Wed, 29 Oct 2025 13:40:10 +0100 Subject: [PATCH 8/8] Added test to check len(samples) and n_local_points error --- tests/test_metrics.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 1f8dad68f..cd384e3af 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -7,6 +7,7 @@ from skmatter.datasets import load_degenerate_CH4_manifold from skmatter.metrics import ( check_global_reconstruction_measures_input, + check_local_reconstruction_measures_input, componentwise_prediction_rigidity, global_reconstruction_distortion, global_reconstruction_error, @@ -233,6 +234,28 @@ def test_source_target_len(self): expected_message = "First dimension of X (2) and Y (1) must match" self.assertEqual(str(context.exception), expected_message) + def test_len_n_local_points(self): + # tests that source len is greater or equal than n_local_points in LFRE + X = np.array([[1, 2, 3], [4, 5, 6]]) + Y = np.array([[1, 1, 1], [2, 2, 2]]) + + n_local_points = 10 + train_idx = [0] + test_idx = [1] + scaler = None + estimator = None + + with self.assertRaises(ValueError) as context: + check_local_reconstruction_measures_input( + X, Y, n_local_points, train_idx, test_idx, scaler, estimator + ) + + expected_message = ( + f"X has {len(X)} samples but n_local_points={n_local_points}. " + "Must have at least n_local_points samples" + ) + self.assertEqual(str(context.exception), expected_message) + class DistanceTests(unittest.TestCase): @classmethod