Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ Added
Removed
-------

- **Breaking change:** The public methods ``cache_predictions`` and ``clear_cache`` on
:class:`~skore.EstimatorReport`, :class:`~skore.CrossValidationReport`, and
:class:`~skore.ComparisonReport` are removed. The library still uses
``_cache_predictions`` and ``_clear_cache`` internally; in application code, rely on
:meth:`~skore.EstimatorReport.get_predictions` and the metrics and inspection APIs,
which populate and reuse the in-memory store automatically.

Fixed
-----

Expand Down
62 changes: 12 additions & 50 deletions examples/model_evaluation/plot_estimator_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@

# %%
#
# Metrics computation with aggressive caching
# Metrics computation and repeated evaluation
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# At this point, we might be interested to have a first look at the statistical
Expand All @@ -116,13 +116,10 @@

# %%
#
# An interesting feature provided by the :class:`skore.EstimatorReport` is the
# the caching mechanism. Indeed, when we have a large enough dataset, computing the
# predictions for a model is not cheap anymore. For instance, on our smallish dataset,
# it took a couple of seconds to compute the metrics. The report will cache the
# predictions and if we are interested in computing a metric again or an alternative
# metric that requires the same predictions, it will be faster. Let's check by
# requesting the same metrics report again.
# On large enough data, getting predictions is often the expensive step. The report
# keeps intermediate results in memory for the same session, so when we ask for the
# same :meth:`~skore.EstimatorReport.metrics.summarize` again, it can complete much
# faster. Let's request the same summary a second time.

start = time.time()
metric_report = report.metrics.summarize().frame()
Expand All @@ -147,22 +144,8 @@

# %%
#
# Whenever computing a metric, we check if the predictions are available in the cache
# and reload them if available. So for instance, let's compute the log loss.

start = time.time()
log_loss = report.metrics.log_loss()
end = time.time()
log_loss

# %%
print(f"Time taken to compute the log loss: {end - start:.2f} seconds")

# %%
#
# We can show that without initial cache, it would have taken more time to compute
# the log loss.
report.clear_cache()
# Another metric on the test set, such as log loss, can reuse the same underlying
# predictions if they were already required for a previous call.

start = time.time()
log_loss = report.metrics.log_loss()
Expand All @@ -181,10 +164,9 @@

# %%
#
# Be aware that we can also benefit from the caching mechanism with our own custom
# metrics. Skore only expects that we define our own metric function to take `y_true`
# and `y_pred` as the first two positional arguments. It can take any other arguments.
# Let's see an example.
# Custom metrics also go through the same path: they receive `y_true` and `y_pred`
# as the first two arguments, and the report supplies predictions consistently with
# built-in metrics. The callable can take any other arguments. Let's see an example.


def operational_decision_cost(y_true, y_pred, amount):
Expand Down Expand Up @@ -259,10 +241,8 @@ def operational_decision_cost(y_true, y_pred, amount):

# %%
#
# Similarly to the metrics, we aggressively use the caching to avoid recomputing the
# predictions of the model. We also cache the plot display object by detection if the
# input parameters are the same as the previous call. Let's demonstrate the kind of
# performance gain we can get.
# Similarly to the metrics, repeated calls for the same ROC display can be much
# faster in the same session once the underlying values have been computed.
start = time.time()
# we already trigger the computation of the predictions in a previous call
display = report.metrics.roc()
Expand All @@ -273,24 +253,6 @@ def operational_decision_cost(y_true, y_pred, amount):
# %%
print(f"Time taken to compute the ROC curve: {end - start:.2f} seconds")

# %%
#
# Now, let's clean the cache and check if we get a slowdown.
report.clear_cache()

# %%
start = time.time()
display = report.metrics.roc()
fig = display.plot()
end = time.time()
fig

# %%
print(f"Time taken to compute the ROC curve: {end - start:.2f} seconds")

# %%
# As expected, since we need to recompute the predictions, it takes more time.

# %%
# Visualizing the confusion matrix
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
100 changes: 38 additions & 62 deletions examples/technical_details/plot_cache_mechanism.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
.. _example_cache_mechanism:

===============
Cache mechanism
===============
====================================
Fast repeated metrics and evaluation
====================================

This example shows how :class:`~skore.EstimatorReport` and
:class:`~skore.CrossValidationReport` use caching to speed up computations.
This example shows that :class:`~skore.EstimatorReport` and
:class:`~skore.CrossValidationReport` avoid redundant work when you compute metrics
or displays several times, so the second call is often much faster than the first.
"""

# %%
Expand Down Expand Up @@ -38,8 +39,8 @@
# Some categories are not well defined.

# %%
# Caching with :class:`~skore.EstimatorReport` and :class:`~skore.CrossValidationReport`
# ======================================================================================
# :class:`~skore.EstimatorReport` and repeated evaluation
# =======================================================
#
# We use `skrub` to create a simple predictive model that handles our dataset's
# challenges.
Expand All @@ -62,14 +63,11 @@
)

# %%
# Caching the predictions for fast metric computation
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# First and second calls to a metric
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# First, we focus on :class:`~skore.EstimatorReport`, as the same philosophy will
# apply to :class:`~skore.CrossValidationReport`.
#
# Let's explore how :class:`~skore.EstimatorReport` uses caching to speed up
# predictions. We start by training the model:
# We build an :class:`~skore.EstimatorReport` and time how long successive metric
# calls take.
from skore import EstimatorReport

report = EstimatorReport(
Expand Down Expand Up @@ -112,8 +110,7 @@
#
# Both approaches take similar time.
#
# Now, watch what happens when we compute the accuracy again with our skore estimator
# report:
# Now, we compute the accuracy again through the same report:
start = time.time()
result = report.metrics.accuracy()
end = time.time()
Expand All @@ -124,13 +121,13 @@

# %%
#
# The second calculation is instant! This happens because the report saves previous
# calculations in its cache. Let's look inside the cache:
report._cache
# The second calculation is much faster, because the report does not repeat the
# expensive ``predict`` work when the same information is still available for this
# session.

# %%
# The cache stores predictions by type and data source. This means that computing
# metrics that use the same type of predictions will be faster.
# A different metric that needs the same predictions
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Let's try the precision metric:
start = time.time()
result = report.metrics.precision()
Expand All @@ -141,23 +138,16 @@
print(f"Time taken: {end - start:.2f} seconds")

# %%
# We observe that it takes only a few milliseconds to compute the precision because we
# don't need to re-compute the predictions and only have to compute the precision
# metric itself.
# Since the predictions are the bottleneck in terms of computation time, we observe
# an interesting speedup.

# %%
# Caching all the possible predictions at once
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# We can pre-compute all predictions at once:
report.cache_predictions()
# It typically stays fast, because the same type of test-set predictions is reused
# where possible.

# %%
# Another data source
# ^^^^^^^^^^^^^^^^^^^
#
# Now, all possible predictions are stored. Any metric calculation will be much faster,
# even on different data (like the training set):
# The first time we ask for a training-set metric, the model must be run on the
# training set as well. Later calls on that data source also benefit from reuse.
start = time.time()
result = report.metrics.log_loss(data_source="train")
end = time.time()
Expand All @@ -167,10 +157,11 @@
print(f"Time taken: {end - start:.2f} seconds")

# %%
# Caching for plotting
# ^^^^^^^^^^^^^^^^^^^^
# Plots
# ^^^^^
#
# The cache also speeds up plots. Let's create a ROC curve:
# Displays (for example a ROC curve) also benefit: the first request builds the
# underlying arrays; a second request for the same display is quick.

start = time.time()
display = report.metrics.roc()
Expand All @@ -182,7 +173,6 @@

# %%
#
# The second plot is instant because it uses cached data:
start = time.time()
display = report.metrics.roc()
display.plot()
Expand All @@ -193,37 +183,27 @@

# %%
#
# We only use the cache to retrieve the `display` object and not directly the matplotlib
# figure. It means that we can still customize the cached plot before displaying it:
# We can still customize the display (for example style) and plot again; the
# evaluation work behind the same metric does not need to be redone in full.
display.set_style(relplot_kwargs={"color": "tab:orange"})
_ = display.plot()

# %%
# Cross-validation: :class:`~skore.CrossValidationReport`
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Be aware that we can clear the cache if we want to:
report.clear_cache()
report._cache

# %%
#
# It means that nothing is stored anymore in the cache.
#
# Caching with :class:`~skore.CrossValidationReport`
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# :class:`~skore.CrossValidationReport` uses the same caching system for each split
# in cross-validation by leveraging the previous :class:`~skore.EstimatorReport`:
# A :class:`~skore.CrossValidationReport` uses one
# :class:`~skore.EstimatorReport` per split, so the same idea applies: the first
# heavy summary of metrics walks every fold; a second run reuses work where possible.
from skore import CrossValidationReport

report = CrossValidationReport(model, X=df, y=y, splitter=5, n_jobs=4)
report.help()

# %%
#
# Since a :class:`~skore.CrossValidationReport` uses many
# :class:`~skore.EstimatorReport`, we will observe the same behaviour as we previously
# exposed.
# The first call will be slow because it computes the predictions for each split.
# The first call to a full summary of metrics can take a while because each fold
# is evaluated.
start = time.time()
result = report.metrics.summarize().frame()
end = time.time()
Expand All @@ -234,15 +214,11 @@

# %%
#
# But the subsequent calls are fast because the predictions are cached.
# The second call is typically much faster.
start = time.time()
result = report.metrics.summarize().frame()
end = time.time()
result

# %%
print(f"Time taken: {end - start:.2f} seconds")

# %%
#
# Hence, we observe the same type of behaviour as we previously exposed.
28 changes: 7 additions & 21 deletions examples/use_cases/plot_employee_salaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,17 +110,11 @@
# %%
# A report provides a collection of useful information. For instance, it allows to
# compute on demand the predictions of the model and some performance metrics.
#
# Let's cache the predictions of the cross-validated models once and for all.

# %%
hgbt_model_report.cache_predictions()
# The first time you call a summary of metrics, the report performs the per-fold
# work it needs; later calls in the same session can reuse a lot of that work.

# %%
# Now that the predictions are cached, any request to compute a metric will be
# performed using the cached predictions and will thus be fast.
#
# We can now have a look at the performance of the model with some standard metrics.
# We can have a look at the performance of the model with some standard metrics.

# %%
hgbt_model_report.metrics.summarize().frame()
Expand Down Expand Up @@ -254,17 +248,9 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
# We observe that the cross-validation report has detected that we have a regression
# task at hand and thus provides us with some metrics and plots that make sense with
# regards to our specific problem at hand.
#
# To accelerate any future computation (e.g. of a metric), we cache the predictions of
# our model once and for all.
# Note that we do not necessarily need to cache the predictions as the report will
# compute them on the fly (if not cached) and cache them for us.

# %%
linear_model_report.cache_predictions()

# %%
# We can now have a look at the performance of the model with some standard metrics.
# We can have a look at the performance of the model with some standard metrics.

# %%
linear_model_report.metrics.summarize().frame(favorability=True)
Expand All @@ -285,9 +271,9 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
# %%
# In addition, if we forgot to compute a specific metric
# (e.g. :func:`~sklearn.metrics.mean_absolute_error`),
# we can easily add it to the report, without re-training the model and even
# without re-computing the predictions since they are cached internally in the report.
# This allows us to save some potentially huge computation time.
# we can easily add it to the report, without re-training the model. The
# comparison reuses the underlying reports' stored evaluation where possible, so
# you can avoid redundant prediction work in the same session.

# %%
comparator.metrics.add(metric="neg_mean_absolute_error", name="MAE")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def content_to_upload(self) -> Generator[bytes, None, None]:
reports_with_cache = [
(report, report._cache) for report in reports if hasattr(report, "_cache")
]
self.report.clear_cache()
self.report._clear_cache()

try:
with BytesIO() as stream:
Expand Down
8 changes: 4 additions & 4 deletions skore-hub-project/src/skore_hub_project/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class EstimatorReport(Protocol):
"""Protocol equivalent to ``skore.EstimatorReport``."""

_hash: int
cache_predictions: Any
clear_cache: Any
_cache_predictions: Any
_clear_cache: Any
_cache: Any
metrics: Any
data: Any
Expand All @@ -49,8 +49,8 @@ class CrossValidationReport(Protocol):
"""Protocol equivalent to ``skore.CrossValidationReport``."""

_hash: int
cache_predictions: Any
clear_cache: Any
_cache_predictions: Any
_clear_cache: Any
metrics: Any
data: Any
estimator_reports_: Any
Expand Down
Loading
Loading