probabl-ai
diff --git a/‎skore-mlflow-project/src/skore_mlflow_project/protocol.py‎
Lines changed: 2 additions & 2 deletions b/‎skore-mlflow-project/src/skore_mlflow_project/protocol.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎skore-mlflow-project/src/skore_mlflow_project/reports.py‎
Lines changed: 1 addition & 1 deletion b/‎skore-mlflow-project/src/skore_mlflow_project/reports.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎skore/src/skore/_plugins/hub/artifact/media/model.py‎
Lines changed: 3 additions & 1 deletion b/‎skore/src/skore/_plugins/hub/artifact/media/model.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎skore/src/skore/_sklearn/_checks/model_checks.py‎
Lines changed: 1 addition & 1 deletion b/‎skore/src/skore/_sklearn/_checks/model_checks.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎skore/src/skore/_sklearn/_comparison/metrics_accessor.py‎
Lines changed: 2 additions & 2 deletions b/‎skore/src/skore/_sklearn/_comparison/metrics_accessor.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎skore/src/skore/_sklearn/_comparison/report.py‎
Lines changed: 1 addition & 1 deletion b/‎skore/src/skore/_sklearn/_comparison/report.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎skore/src/skore/_sklearn/_cross_validation/report.py‎
Lines changed: 31 additions & 23 deletions b/‎skore/src/skore/_sklearn/_cross_validation/report.py‎
Lines changed: 31 additions & 23 deletions
diff --git a/‎skore/src/skore/_sklearn/_estimator/metrics_accessor.py‎
Lines changed: 2 additions & 2 deletions b/‎skore/src/skore/_sklearn/_estimator/metrics_accessor.py‎
Lines changed: 2 additions & 2 deletions
@@ -17,7 +17,7 @@ class EstimatorReport(Protocol):
     """Protocol equivalent to ``skore.EstimatorReport``."""
 
     ml_task: str
-    estimator: BaseEstimator
+    original_estimator: BaseEstimator
     estimator_: BaseEstimator
     estimator_name_: str
     X_train: DatasetLike | None
@@ -33,7 +33,7 @@ class CrossValidationReport(Protocol):
     """Protocol equivalent to ``skore.CrossValidationReport``."""
 
     ml_task: str
-    estimator: BaseEstimator
+    original_estimator: BaseEstimator
     estimator_: BaseEstimator
     estimator_name_: str
     estimator_reports_: list[EstimatorReport]
 
@@ -199,7 +199,7 @@ def iter_cv(report: CrossValidationReport) -> Generator[NestedLogItem, None, Non
     """Yield loggable objects for a cross-validation report."""
     yield from iter_cv_metrics(report)
 
-    estimator = clone(report.estimator).fit(report.X, report.y)
+    estimator = clone(report.original_estimator).fit(report.X, report.y)
     yield Params(estimator.get_params())
     yield Model(estimator, _sample_input_example(report.X))
 
 
@@ -13,8 +13,10 @@ class EstimatorHtmlRepr(Media[Report]):  # noqa: D101
     def content_to_upload(self) -> str:  # noqa: D102
         import sklearn.utils
 
+        # FIXME: Unclear if we want to repr of estimator_
+        # or the original_estimator
         estimator_html_repr: str = sklearn.utils.estimator_html_repr(
-            self.report.estimator
+            self.report.estimator_
         )
 
         return estimator_html_repr
@@ -290,7 +290,7 @@ def check_function(self, report: _BaseReport) -> str | None:
             isinstance(report, EstimatorReport)
             and report.X_train is not None
             and report.X_test is not None
-            and hasattr(report.estimator, "coef_")
+            and hasattr(report.estimator_, "coef_")
         ):
             raise CheckNotApplicable()
 
 
@@ -1182,10 +1182,10 @@ def confusion_matrix(
         """
         do_thresholds = True
         if not all(
-            hasattr(report._estimator, "predict_proba")
+            hasattr(report.learner_, "predict_proba")
             for report in self._parent.reports_.values()
         ) and not all(
-            hasattr(report._estimator, "decision_function")
+            hasattr(report.learner_, "decision_function")
             for report in self._parent.reports_.values()
         ):
             warnings.warn(
 
@@ -523,7 +523,7 @@ def create_estimator_report(
             X_train, y_train = estimator_report.X_train, estimator_report.y_train
 
         return EstimatorReport(
-            estimator_report._raw_estimator,
+            estimator_report._original_estimator,
             fit=True,
             X_train=X_train,
             y_train=y_train,
 
@@ -9,7 +9,7 @@
 import skrub
 from joblib import Parallel
 from numpy.typing import ArrayLike
-from sklearn.base import BaseEstimator, clone, is_classifier
+from sklearn.base import clone, is_classifier
 from sklearn.model_selection import check_cv
 from sklearn.pipeline import Pipeline
 
@@ -66,7 +66,7 @@ def _check_estimator_and_data(
     X: ArrayLike | None,
     y: ArrayLike | None,
     data: dict | None,
-) -> tuple[bool, EstimatorLike, dict]:
+) -> tuple[bool, skrub.SkrubLearner, dict]:
     if is_skrub_learner(estimator):
         initialized_with_data_op = True
         if X is not None or y is not None:
@@ -153,7 +153,13 @@ class CrossValidationReport(_BaseReport, DirNamesMixin):
     Attributes
     ----------
     estimator_ : estimator object
-        The cloned or copied estimator.
+        The fitted estimator.
+
+    estimator : estimator object
+        The estimator that was given as input.
+
+    learner_ : skrub.SkrubLearner
+        The estimator wrapped in a skrub Learner.
 
     estimator_name_ : str
         The name of the estimator.
@@ -204,7 +210,7 @@ def __init__(
         n_jobs: int | None = None,
     ) -> None:
         super().__init__()
-        self._raw_estimator = estimator
+        self._original_estimator = estimator
         if isinstance(estimator, skrub.DataOp):
             if data is None:
                 data = estimator.skb.get_data()
@@ -214,7 +220,7 @@ def __init__(
                 "Clustering models are not supported yet. Please use a"
                 " classification or regression model instead."
             )
-        self._initialized_with_data_op, self._estimator, self._data = (
+        self._initialized_with_data_op, self.learner_, self._data = (
             _check_estimator_and_data(clone(estimator), X, y, data)
         )
         self._pos_label = pos_label
@@ -244,12 +250,12 @@ def _fit_estimator_reports(self) -> list[EstimatorReport]:
                 track(
                     parallel(
                         delayed(EstimatorReport)(
-                            clone(self._estimator),
+                            clone(self.learner_),
                             train_data=split["train"],
                             test_data=split["test"],
                             pos_label=self._pos_label,
                         )
-                        for split in self._estimator.data_op.skb.iter_cv_splits(
+                        for split in self.learner_.data_op.skb.iter_cv_splits(
                             environment=self._data, cv=self.split_indices
                         )
                     ),
@@ -264,7 +270,7 @@ def _fit_estimator_reports(self) -> list[EstimatorReport]:
                 track(
                     parallel(
                         delayed(_generate_estimator_report)(
-                            clone(self._raw_estimator),
+                            clone(self._original_estimator),
                             self.X,
                             self.y,
                             self.pos_label,
@@ -296,10 +302,10 @@ def get_state(self) -> dict[str, Any]:
             "version": _STATE_VERSION,
             "metadata": self._metadata,
             "initialized_with_data_op": self._initialized_with_data_op,
-            "raw_estimator": self._raw_estimator,
+            "original_estimator": self._original_estimator,
             "ml_task": self.ml_task,
             "pos_label": self.pos_label,
-            "estimator": self._estimator,
+            "estimator": self.learner_,
             "data": self._data,
             "split_indices": self._split_indices,
             "estimator_reports": sub_states,
@@ -323,8 +329,8 @@ def from_state(cls, state: dict[str, Any]) -> CrossValidationReport:
         report._initialized_with_data_op = state["initialized_with_data_op"]
         report._ml_task = state["ml_task"]
         report._pos_label = state["pos_label"]
-        report._estimator = state["estimator"]
-        report._raw_estimator = state["raw_estimator"]
+        report.learner_ = state["estimator"]
+        report._original_estimator = state["original_estimator"]
         report._data = state["data"]
         report._split_indices = state["split_indices"]
         # TODO? Include splitter in state?
@@ -333,7 +339,7 @@ def from_state(cls, state: dict[str, Any]) -> CrossValidationReport:
 
         report.estimator_reports_ = []
         if report._initialized_with_data_op:
-            split_data_iterator = report._estimator.data_op.skb.iter_cv_splits(
+            split_data_iterator = report.learner_.data_op.skb.iter_cv_splits(
                 environment=report._data,
                 cv=report._split_indices,
             )
@@ -535,14 +541,14 @@ def create_estimator_report(
         """
         if self._initialized_with_data_op:
             report = EstimatorReport(
-                self._estimator,
+                self.learner_,
                 train_data=self._data,
                 test_data=test_data,
                 pos_label=self._pos_label,
             )
         else:
             report = EstimatorReport(
-                self._raw_estimator,
+                self._original_estimator,
                 X_train=self.X,
                 y_train=self.y,
                 X_test=X_test,
@@ -586,21 +592,23 @@ def ml_task(self) -> str:
         return self._ml_task
 
     @property
-    def estimator(self) -> BaseEstimator:
-        return self.estimator_
+    def original_estimator(self) -> EstimatorLike:
+        """The estimator that was given as input."""
+        return self._original_estimator
 
     @property
-    def estimator_(self) -> BaseEstimator:
+    def estimator_(self) -> EstimatorLike:
+        """The report's fitted estimator."""
         if self._initialized_with_data_op:
-            return self._estimator
-        return to_estimator(self._estimator)
+            return self.learner_
+        return to_estimator(self.learner_)
 
     @property
     def estimator_name_(self) -> str:
-        if isinstance(self._raw_estimator, Pipeline):
-            name = self._raw_estimator[-1].__class__.__name__
+        if isinstance(self._original_estimator, Pipeline):
+            name = self._original_estimator[-1].__class__.__name__
         else:
-            name = self._raw_estimator.__class__.__name__
+            name = self._original_estimator.__class__.__name__
         return name
 
     @property
 
@@ -1163,8 +1163,8 @@ def confusion_matrix(
                 "data_source='both' is not supported for confusion_matrix."
             )
 
-        if hasattr(self._parent._estimator, "predict_proba") or hasattr(
-            self._parent._estimator, "decision_function"
+        if hasattr(self._parent.learner_, "predict_proba") or hasattr(
+            self._parent.learner_, "decision_function"
         ):
             y_scores = self._parent._get_predictions(
                 data_source=data_source,