probabl-ai · glemaitre · Apr 26, 2026
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -48,6 +48,13 @@ Added
 Removed
 -------
 
+- **Breaking change:** The public methods ``cache_predictions`` and ``clear_cache`` on
+  :class:`~skore.EstimatorReport`, :class:`~skore.CrossValidationReport`, and
+  :class:`~skore.ComparisonReport` are removed. The library still uses
+  ``_cache_predictions`` and ``_clear_cache`` internally; in application code, rely on
+  :meth:`~skore.EstimatorReport.get_predictions` and the metrics and inspection APIs,
+  which populate and reuse the in-memory store automatically.
+
 Fixed
 -----
 

diff --git a/examples/model_evaluation/plot_estimator_report.py b/examples/model_evaluation/plot_estimator_report.py
@@ -96,7 +96,7 @@
 
 # %%
 #
-# Metrics computation with aggressive caching
+# Metrics computation and repeated evaluation
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # At this point, we might be interested to have a first look at the statistical
@@ -116,13 +116,10 @@
 
 # %%
 #
-# An interesting feature provided by the :class:`skore.EstimatorReport` is the
-# the caching mechanism. Indeed, when we have a large enough dataset, computing the
-# predictions for a model is not cheap anymore. For instance, on our smallish dataset,
-# it took a couple of seconds to compute the metrics. The report will cache the
-# predictions and if we are interested in computing a metric again or an alternative
-# metric that requires the same predictions, it will be faster. Let's check by
-# requesting the same metrics report again.
+# On large enough data, getting predictions is often the expensive step. The report
+# keeps intermediate results in memory for the same session, so when we ask for the
+# same :meth:`~skore.EstimatorReport.metrics.summarize` again, it can complete much
+# faster. Let's request the same summary a second time.
 
 start = time.time()
 metric_report = report.metrics.summarize().frame()
@@ -147,22 +144,8 @@
 
 # %%
 #
-# Whenever computing a metric, we check if the predictions are available in the cache
-# and reload them if available. So for instance, let's compute the log loss.
-
-start = time.time()
-log_loss = report.metrics.log_loss()
-end = time.time()
-log_loss
-
-# %%
-print(f"Time taken to compute the log loss: {end - start:.2f} seconds")
-
-# %%
-#
-# We can show that without initial cache, it would have taken more time to compute
-# the log loss.
-report.clear_cache()
+# Another metric on the test set, such as log loss, can reuse the same underlying
+# predictions if they were already required for a previous call.
 
 start = time.time()
 log_loss = report.metrics.log_loss()
@@ -181,10 +164,9 @@
 
 # %%
 #
-# Be aware that we can also benefit from the caching mechanism with our own custom
-# metrics. Skore only expects that we define our own metric function to take `y_true`
-# and `y_pred` as the first two positional arguments. It can take any other arguments.
-# Let's see an example.
+# Custom metrics also go through the same path: they receive `y_true` and `y_pred`
+# as the first two arguments, and the report supplies predictions consistently with
+# built-in metrics. The callable can take any other arguments. Let's see an example.
 
 
 def operational_decision_cost(y_true, y_pred, amount):
@@ -259,10 +241,8 @@ def operational_decision_cost(y_true, y_pred, amount):
 
 # %%
 #
-# Similarly to the metrics, we aggressively use the caching to avoid recomputing the
-# predictions of the model. We also cache the plot display object by detection if the
-# input parameters are the same as the previous call. Let's demonstrate the kind of
-# performance gain we can get.
+# Similarly to the metrics, repeated calls for the same ROC display can be much
+# faster in the same session once the underlying values have been computed.
 start = time.time()
 # we already trigger the computation of the predictions in a previous call
 display = report.metrics.roc()
@@ -273,24 +253,6 @@ def operational_decision_cost(y_true, y_pred, amount):
 # %%
 print(f"Time taken to compute the ROC curve: {end - start:.2f} seconds")
 
-# %%
-#
-# Now, let's clean the cache and check if we get a slowdown.
-report.clear_cache()
-
-# %%
-start = time.time()
-display = report.metrics.roc()
-fig = display.plot()
-end = time.time()
-fig
-
-# %%
-print(f"Time taken to compute the ROC curve: {end - start:.2f} seconds")
-
-# %%
-# As expected, since we need to recompute the predictions, it takes more time.
-
 # %%
 # Visualizing the confusion matrix
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/examples/technical_details/plot_cache_mechanism.py b/examples/technical_details/plot_cache_mechanism.py
@@ -1,12 +1,13 @@
 """
 .. _example_cache_mechanism:
 
-===============
-Cache mechanism
-===============
+====================================
+Fast repeated metrics and evaluation
+====================================
 
-This example shows how :class:`~skore.EstimatorReport` and
-:class:`~skore.CrossValidationReport` use caching to speed up computations.
+This example shows that :class:`~skore.EstimatorReport` and
+:class:`~skore.CrossValidationReport` avoid redundant work when you compute metrics
+or displays several times, so the second call is often much faster than the first.
 """
 
 # %%
@@ -38,8 +39,8 @@
 # Some categories are not well defined.
 
 # %%
-# Caching with :class:`~skore.EstimatorReport` and :class:`~skore.CrossValidationReport`
-# ======================================================================================
+# :class:`~skore.EstimatorReport` and repeated evaluation
+# =======================================================
 #
 # We use `skrub` to create a simple predictive model that handles our dataset's
 # challenges.
@@ -62,14 +63,11 @@
 )
 
 # %%
-# Caching the predictions for fast metric computation
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# First and second calls to a metric
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# First, we focus on :class:`~skore.EstimatorReport`, as the same philosophy will
-# apply to :class:`~skore.CrossValidationReport`.
-#
-# Let's explore how :class:`~skore.EstimatorReport` uses caching to speed up
-# predictions. We start by training the model:
+# We build an :class:`~skore.EstimatorReport` and time how long successive metric
+# calls take.
 from skore import EstimatorReport
 
 report = EstimatorReport(
@@ -112,8 +110,7 @@
 #
 # Both approaches take similar time.
 #
-# Now, watch what happens when we compute the accuracy again with our skore estimator
-# report:
+# Now, we compute the accuracy again through the same report:
 start = time.time()
 result = report.metrics.accuracy()
 end = time.time()
@@ -124,13 +121,13 @@
 
 # %%
 #
-# The second calculation is instant! This happens because the report saves previous
-# calculations in its cache. Let's look inside the cache:
-report._cache
+# The second calculation is much faster, because the report does not repeat the
+# expensive ``predict`` work when the same information is still available for this
+# session.
 
 # %%
-# The cache stores predictions by type and data source. This means that computing
-# metrics that use the same type of predictions will be faster.
+# A different metric that needs the same predictions
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 # Let's try the precision metric:
 start = time.time()
 result = report.metrics.precision()
@@ -141,23 +138,16 @@
 print(f"Time taken: {end - start:.2f} seconds")
 
 # %%
-# We observe that it takes only a few milliseconds to compute the precision because we
-# don't need to re-compute the predictions and only have to compute the precision
-# metric itself.
-# Since the predictions are the bottleneck in terms of computation time, we observe
-# an interesting speedup.
-
-# %%
-# Caching all the possible predictions at once
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# We can pre-compute all predictions at once:
-report.cache_predictions()
+# It typically stays fast, because the same type of test-set predictions is reused
+# where possible.
 
 # %%
+# Another data source
+# ^^^^^^^^^^^^^^^^^^^
 #
-# Now, all possible predictions are stored. Any metric calculation will be much faster,
-# even on different data (like the training set):
+# The first time we ask for a training-set metric, the model must be run on the
+# training set as well. Later calls on that data source also benefit from reuse.
 start = time.time()
 result = report.metrics.log_loss(data_source="train")
 end = time.time()
@@ -167,10 +157,11 @@
 print(f"Time taken: {end - start:.2f} seconds")
 
 # %%
-# Caching for plotting
-# ^^^^^^^^^^^^^^^^^^^^
+# Plots
+# ^^^^^
 #
-# The cache also speeds up plots. Let's create a ROC curve:
+# Displays (for example a ROC curve) also benefit: the first request builds the
+# underlying arrays; a second request for the same display is quick.
 
 start = time.time()
 display = report.metrics.roc()
@@ -182,7 +173,6 @@
 
 # %%
 #
-# The second plot is instant because it uses cached data:
 start = time.time()
 display = report.metrics.roc()
 display.plot()
@@ -193,37 +183,27 @@
 
 # %%
 #
-# We only use the cache to retrieve the `display` object and not directly the matplotlib
-# figure. It means that we can still customize the cached plot before displaying it:
+# We can still customize the display (for example style) and plot again; the
+# evaluation work behind the same metric does not need to be redone in full.
 display.set_style(relplot_kwargs={"color": "tab:orange"})
 _ = display.plot()
 
 # %%
+# Cross-validation: :class:`~skore.CrossValidationReport`
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# Be aware that we can clear the cache if we want to:
-report.clear_cache()
-report._cache
-
-# %%
-#
-# It means that nothing is stored anymore in the cache.
-#
-# Caching with :class:`~skore.CrossValidationReport`
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#
-# :class:`~skore.CrossValidationReport` uses the same caching system for each split
-# in cross-validation by leveraging the previous :class:`~skore.EstimatorReport`:
+# A :class:`~skore.CrossValidationReport` uses one
+# :class:`~skore.EstimatorReport` per split, so the same idea applies: the first
+# heavy summary of metrics walks every fold; a second run reuses work where possible.
 from skore import CrossValidationReport
 
 report = CrossValidationReport(model, X=df, y=y, splitter=5, n_jobs=4)
 report.help()
 
 # %%
 #
-# Since a :class:`~skore.CrossValidationReport` uses many
-# :class:`~skore.EstimatorReport`, we will observe the same behaviour as we previously
-# exposed.
-# The first call will be slow because it computes the predictions for each split.
+# The first call to a full summary of metrics can take a while because each fold
+# is evaluated.
 start = time.time()
 result = report.metrics.summarize().frame()
 end = time.time()
@@ -234,15 +214,11 @@
 
 # %%
 #
-# But the subsequent calls are fast because the predictions are cached.
+# The second call is typically much faster.
 start = time.time()
 result = report.metrics.summarize().frame()
 end = time.time()
 result
 
 # %%
 print(f"Time taken: {end - start:.2f} seconds")
-
-# %%
-#
-# Hence, we observe the same type of behaviour as we previously exposed.
diff --git a/examples/use_cases/plot_employee_salaries.py b/examples/use_cases/plot_employee_salaries.py
@@ -110,17 +110,11 @@
 # %%
 # A report provides a collection of useful information. For instance, it allows to
 # compute on demand the predictions of the model and some performance metrics.
-#
-# Let's cache the predictions of the cross-validated models once and for all.
-
-# %%
-hgbt_model_report.cache_predictions()
+# The first time you call a summary of metrics, the report performs the per-fold
+# work it needs; later calls in the same session can reuse a lot of that work.
 
 # %%
-# Now that the predictions are cached, any request to compute a metric will be
-# performed using the cached predictions and will thus be fast.
-#
-# We can now have a look at the performance of the model with some standard metrics.
+# We can have a look at the performance of the model with some standard metrics.
 
 # %%
 hgbt_model_report.metrics.summarize().frame()
@@ -254,17 +248,9 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
 # We observe that the cross-validation report has detected that we have a regression
 # task at hand and thus provides us with some metrics and plots that make sense with
 # regards to our specific problem at hand.
-#
-# To accelerate any future computation (e.g. of a metric), we cache the predictions of
-# our model once and for all.
-# Note that we do not necessarily need to cache the predictions as the report will
-# compute them on the fly (if not cached) and cache them for us.
-
-# %%
-linear_model_report.cache_predictions()
 
 # %%
-# We can now have a look at the performance of the model with some standard metrics.
+# We can have a look at the performance of the model with some standard metrics.
 
 # %%
 linear_model_report.metrics.summarize().frame(favorability=True)
@@ -285,9 +271,9 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
 # %%
 # In addition, if we forgot to compute a specific metric
 # (e.g. :func:`~sklearn.metrics.mean_absolute_error`),
-# we can easily add it to the report, without re-training the model and even
-# without re-computing the predictions since they are cached internally in the report.
-# This allows us to save some potentially huge computation time.
+# we can easily add it to the report, without re-training the model. The
+# comparison reuses the underlying reports' stored evaluation where possible, so
+# you can avoid redundant prediction work in the same session.
 
 # %%
 comparator.metrics.add(metric="neg_mean_absolute_error", name="MAE")

diff --git a/skore-hub-project/src/skore_hub_project/artifact/pickle/pickle.py b/skore-hub-project/src/skore_hub_project/artifact/pickle/pickle.py
@@ -51,7 +51,7 @@ def content_to_upload(self) -> Generator[bytes, None, None]:
         reports_with_cache = [
             (report, report._cache) for report in reports if hasattr(report, "_cache")
         ]
-        self.report.clear_cache()
+        self.report._clear_cache()
 
         try:
             with BytesIO() as stream:

diff --git a/skore-hub-project/src/skore_hub_project/protocol.py b/skore-hub-project/src/skore_hub_project/protocol.py
@@ -26,8 +26,8 @@ class EstimatorReport(Protocol):
     """Protocol equivalent to ``skore.EstimatorReport``."""
 
     _hash: int
-    cache_predictions: Any
-    clear_cache: Any
+    _cache_predictions: Any
+    _clear_cache: Any
     _cache: Any
     metrics: Any
     data: Any
@@ -49,8 +49,8 @@ class CrossValidationReport(Protocol):
     """Protocol equivalent to ``skore.CrossValidationReport``."""
 
     _hash: int
-    cache_predictions: Any
-    clear_cache: Any
+    _cache_predictions: Any
+    _clear_cache: Any
     metrics: Any
     data: Any
     estimator_reports_: Any