pymc-labs · drbenvincent · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/causalpy/checks/convex_hull.py b/causalpy/checks/convex_hull.py
@@ -50,8 +50,8 @@ def run(
     ) -> CheckResult:
         """Run the convex hull violation check on pre-treatment data."""
         sc = experiment
-        datapre_control = sc.datapre_control  # type: ignore[attr-defined]
-        datapre_treated = sc.datapre_treated  # type: ignore[attr-defined]
+        datapre_control = sc.pre_design["control"]  # type: ignore[attr-defined]
+        datapre_treated = sc.pre_design["treated"]  # type: ignore[attr-defined]
 
         all_results = []
         total_violations = 0

diff --git a/causalpy/experiments/base.py b/causalpy/experiments/base.py
@@ -18,13 +18,16 @@
 from __future__ import annotations
 
 import contextlib
+import warnings
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Literal
 
 import arviz as az
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
+import xarray as xr
 from sklearn.base import RegressorMixin
 
 from causalpy.maketables_adapters import get_maketables_adapter
@@ -114,6 +117,67 @@ class BaseExperiment(ABC):
 
     _default_model_class: type[PyMCModel] | None = None
 
+    _deprecated_design_aliases: dict[str, tuple[str, str]] = {}
+    """Mapping of ``old_attr -> (dataset_attr, key)`` for deprecated design
+    matrix accessors.  Subclasses populate this so that
+    ``__getattr__`` can forward accesses with a deprecation warning."""
+
+    def __getattr__(self, name: str) -> Any:
+        aliases = type(self)._deprecated_design_aliases
+        if name in aliases:
+            dataset_attr, key = aliases[name]
+            warnings.warn(
+                f"{name} is deprecated, use {dataset_attr}['{key}']",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return getattr(self, dataset_attr)[key]
+        raise AttributeError(
+            f"'{type(self).__name__}' object has no attribute '{name}'"
+        )
+
+    @staticmethod
+    def _build_design_dataset(
+        X_raw: np.ndarray,
+        y_raw: np.ndarray,
+        *,
+        obs_ind: np.ndarray | pd.Index,
+        coeffs: list[str],
+        treated_units: list[str] | None = None,
+    ) -> xr.Dataset:
+        """Build a standard ``xr.Dataset`` from raw design matrices.
+
+        Parameters
+        ----------
+        X_raw : np.ndarray
+            Predictor matrix, shape ``(n_obs, n_coeffs)``.
+        y_raw : np.ndarray
+            Outcome matrix, shape ``(n_obs, n_units)``.
+        obs_ind : array-like
+            Observation index coordinates.
+        coeffs : list[str]
+            Coefficient / column names for ``X_raw``.
+        treated_units : list[str], optional
+            Names for the treated-unit dimension of ``y_raw``.
+            Defaults to ``["unit_0"]``.
+        """
+        if treated_units is None:
+            treated_units = ["unit_0"]
+        return xr.Dataset(
+            {
+                "X": xr.DataArray(
+                    X_raw,
+                    dims=["obs_ind", "coeffs"],
+                    coords={"obs_ind": obs_ind, "coeffs": coeffs},
+                ),
+                "y": xr.DataArray(
+                    y_raw,
+                    dims=["obs_ind", "treated_units"],
+                    coords={"obs_ind": obs_ind, "treated_units": treated_units},
+                ),
+            }
+        )
+
     def __init__(self, model: PyMCModel | RegressorMixin | None = None) -> None:
         # Ensure we've made any provided Scikit Learn model (as identified as being type
         # RegressorMixin) compatible with CausalPy by appending our custom methods.

diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py
@@ -98,6 +98,7 @@ class DifferenceInDifferences(BaseExperiment):
     supports_ols = True
     supports_bayes = True
     _default_model_class = LinearRegression
+    _deprecated_design_aliases = {"X": ("design", "X"), "y": ("design", "y")}
 
     def __init__(
         self,
@@ -130,42 +131,36 @@ def _build_design_matrices(self) -> None:
         self._y_design_info = y.design_info
         self._x_design_info = X.design_info
         self.labels = X.design_info.column_names
-        self.y, self.X = np.asarray(y), np.asarray(X)
+        self._y_raw, self._X_raw = np.asarray(y), np.asarray(X)
         self.outcome_variable_name = y.design_info.column_names[0]
 
     def _prepare_data(self) -> None:
-        """Convert design matrices to xarray DataArrays."""
-        self.X = xr.DataArray(
-            self.X,
-            dims=["obs_ind", "coeffs"],
-            coords={
-                "obs_ind": np.arange(self.X.shape[0]),
-                "coeffs": self.labels,
-            },
-        )
-        self.y = xr.DataArray(
-            self.y,
-            dims=["obs_ind", "treated_units"],
-            coords={"obs_ind": np.arange(self.y.shape[0]), "treated_units": ["unit_0"]},
+        """Bundle design matrices into an ``xr.Dataset``."""
+        n = self._X_raw.shape[0]
+        self.design = self._build_design_dataset(
+            self._X_raw,
+            self._y_raw,
+            obs_ind=np.arange(n),
+            coeffs=self.labels,
         )
+        del self._X_raw, self._y_raw
 
     def algorithm(self) -> None:
         """Run the experiment algorithm: fit model, predict, and calculate causal impact."""
-        # fit model
+        X = self.design["X"]
+        y = self.design["y"]
+
         if isinstance(self.model, PyMCModel):
             COORDS = {
                 "coeffs": self.labels,
-                "obs_ind": np.arange(self.X.shape[0]),
+                "obs_ind": np.arange(X.shape[0]),
                 "treated_units": ["unit_0"],
             }
-            self.model.fit(X=self.X, y=self.y, coords=COORDS)
+            self.model.fit(X=X, y=y, coords=COORDS)
         elif isinstance(self.model, RegressorMixin):
-            # Ensure the intercept is part of the coefficients array rather than
-            # a separate intercept_ attribute.  See #664 / PR #693 for
-            # centralising this in BaseExperiment.
             if hasattr(self.model, "fit_intercept"):
                 self.model.fit_intercept = False
-            self.model.fit(X=self.X, y=self.y)
+            self.model.fit(X=X, y=y)
         else:
             raise ValueError("Model type not recognized")
 

diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py
@@ -128,6 +128,12 @@ class InterruptedTimeSeries(BaseExperiment):
     supports_ols = True
     supports_bayes = True
     _default_model_class = LinearRegression
+    _deprecated_design_aliases = {
+        "pre_X": ("pre_design", "X"),
+        "pre_y": ("pre_design", "y"),
+        "post_X": ("post_design", "X"),
+        "post_y": ("post_design", "y"),
+    }
 
     def __init__(
         self,
@@ -139,9 +145,8 @@ def __init__(
         **kwargs: Any,
     ) -> None:
         super().__init__(model=model)
-        self.pre_y: xr.DataArray
-        self.post_y: xr.DataArray
-        # rename the index to "obs_ind"
+        self.pre_design: xr.Dataset
+        self.post_design: xr.Dataset
         data.index.name = "obs_ind"
         self.data = data
         self.input_validation(data, treatment_time, treatment_end_time)
@@ -155,96 +160,76 @@ def __init__(
 
     def _build_design_matrices(self) -> None:
         """Build design matrices for pre and post intervention periods using patsy."""
-        # set things up with pre-intervention data
         y, X = dmatrices(self.formula, self.datapre)
         self.outcome_variable_name = y.design_info.column_names[0]
         self._y_design_info = y.design_info
         self._x_design_info = X.design_info
         self.labels = X.design_info.column_names
-        self.pre_y, self.pre_X = np.asarray(y), np.asarray(X)
-        # process post-intervention data
+        self._pre_y_raw, self._pre_X_raw = np.asarray(y), np.asarray(X)
         (new_y, new_x) = build_design_matrices(
             [self._y_design_info, self._x_design_info], self.datapost
         )
-        self.post_X = np.asarray(new_x)
-        self.post_y = np.asarray(new_y)
+        self._post_X_raw = np.asarray(new_x)
+        self._post_y_raw = np.asarray(new_y)
 
     def _prepare_data(self) -> None:
-        """Convert design matrices to xarray DataArrays for pre and post periods."""
-        self.pre_X = xr.DataArray(
-            self.pre_X,
-            dims=["obs_ind", "coeffs"],
-            coords={
-                "obs_ind": self.datapre.index,
-                "coeffs": self.labels,
-            },
-        )
-        self.pre_y = xr.DataArray(
-            self.pre_y,  # Keep 2D shape
-            dims=["obs_ind", "treated_units"],
-            coords={"obs_ind": self.datapre.index, "treated_units": ["unit_0"]},
-        )
-        self.post_X = xr.DataArray(
-            self.post_X,
-            dims=["obs_ind", "coeffs"],
-            coords={
-                "obs_ind": self.datapost.index,
-                "coeffs": self.labels,
-            },
+        """Bundle design matrices into ``xr.Dataset`` objects for pre and post periods."""
+        self.pre_design = self._build_design_dataset(
+            self._pre_X_raw,
+            self._pre_y_raw,
+            obs_ind=self.datapre.index,
+            coeffs=self.labels,
         )
-        self.post_y = xr.DataArray(
-            self.post_y,  # Keep 2D shape
-            dims=["obs_ind", "treated_units"],
-            coords={"obs_ind": self.datapost.index, "treated_units": ["unit_0"]},
+        self.post_design = self._build_design_dataset(
+            self._post_X_raw,
+            self._post_y_raw,
+            obs_ind=self.datapost.index,
+            coeffs=self.labels,
         )
+        del self._pre_X_raw, self._pre_y_raw, self._post_X_raw, self._post_y_raw
 
     def algorithm(self) -> None:
         """Run the experiment algorithm: fit model, predict, and calculate causal impact."""
-        # fit the model to the observed (pre-intervention) data
-        # All PyMC models now accept xr.DataArray with consistent API
+        pre_X = self.pre_design["X"]
+        pre_y = self.pre_design["y"]
+        post_X = self.post_design["X"]
+        post_y = self.post_design["y"]
+
         if isinstance(self.model, PyMCModel):
             COORDS: dict[str, Any] = {
                 "coeffs": self.labels,
-                "obs_ind": np.arange(self.pre_X.shape[0]),
+                "obs_ind": np.arange(pre_X.shape[0]),
                 "treated_units": ["unit_0"],
-                "datetime_index": self.datapre.index,  # For time series models
+                "datetime_index": self.datapre.index,
             }
-            self.model.fit(X=self.pre_X, y=self.pre_y, coords=COORDS)
+            self.model.fit(X=pre_X, y=pre_y, coords=COORDS)
         elif isinstance(self.model, RegressorMixin):
-            # For OLS models, use 1D y data
-            self.model.fit(X=self.pre_X, y=self.pre_y.isel(treated_units=0))
+            self.model.fit(X=pre_X, y=pre_y.isel(treated_units=0))
         else:
             raise ValueError("Model type not recognized")
 
-        # score the goodness of fit to the pre-intervention data
         if isinstance(self.model, PyMCModel):
-            self.score = self.model.score(X=self.pre_X, y=self.pre_y)
+            self.score = self.model.score(X=pre_X, y=pre_y)
         elif isinstance(self.model, RegressorMixin):
-            self.score = self.model.score(
-                X=self.pre_X, y=self.pre_y.isel(treated_units=0)
-            )
+            self.score = self.model.score(X=pre_X, y=pre_y.isel(treated_units=0))
 
-        # get the model predictions of the observed (pre-intervention) data
         if isinstance(self.model, PyMCModel | RegressorMixin):
-            self.pre_pred = self.model.predict(X=self.pre_X)
+            self.pre_pred = self.model.predict(X=pre_X)
 
-        # calculate the counterfactual (post period)
         if isinstance(self.model, PyMCModel):
-            self.post_pred = self.model.predict(X=self.post_X, out_of_sample=True)
+            self.post_pred = self.model.predict(X=post_X, out_of_sample=True)
         elif isinstance(self.model, RegressorMixin):
-            self.post_pred = self.model.predict(X=self.post_X)
+            self.post_pred = self.model.predict(X=post_X)
 
-        # calculate impact - all PyMC models now use 2D data with treated_units
         if isinstance(self.model, PyMCModel):
-            self.pre_impact = self.model.calculate_impact(self.pre_y, self.pre_pred)
-            self.post_impact = self.model.calculate_impact(self.post_y, self.post_pred)
+            self.pre_impact = self.model.calculate_impact(pre_y, self.pre_pred)
+            self.post_impact = self.model.calculate_impact(post_y, self.post_pred)
         elif isinstance(self.model, RegressorMixin):
-            # SKL models work with 1D data
             self.pre_impact = self.model.calculate_impact(
-                self.pre_y.isel(treated_units=0), self.pre_pred
+                pre_y.isel(treated_units=0), self.pre_pred
             )
             self.post_impact = self.model.calculate_impact(
-                self.post_y.isel(treated_units=0), self.post_pred
+                post_y.isel(treated_units=0), self.post_pred
             )
 
         self.post_impact_cumulative = self.model.calculate_cumulative_impact(
@@ -625,9 +610,7 @@ def _bayesian_plot(
 
         (h,) = ax[0].plot(
             self.datapre.index,
-            self.pre_y.isel(treated_units=0)
-            if hasattr(self.pre_y, "isel")
-            else self.pre_y[:, 0],
+            self.pre_design["y"].isel(treated_units=0),
             "k.",
             label="Observations",
         )
@@ -652,9 +635,7 @@ def _bayesian_plot(
 
         ax[0].plot(
             self.datapost.index,
-            self.post_y.isel(treated_units=0)
-            if hasattr(self.post_y, "isel")
-            else self.post_y[:, 0],
+            self.post_design["y"].isel(treated_units=0),
             "k.",
         )
         # Shaded causal effect
@@ -667,9 +648,7 @@ def _bayesian_plot(
         h = ax[0].fill_between(
             self.datapost.index,
             y1=post_pred_mu,
-            y2=self.post_y.isel(treated_units=0)
-            if hasattr(self.post_y, "isel")
-            else self.post_y[:, 0],
+            y2=self.post_design["y"].isel(treated_units=0),
             color="C0",
             alpha=0.25,
         )
@@ -805,10 +784,10 @@ def _ols_plot(
 
         fig, ax = plt.subplots(3, 1, sharex=True, figsize=(7, 8))
 
-        ax[0].plot(self.datapre.index, self.pre_y, "k.")
+        ax[0].plot(self.datapre.index, self.pre_design["y"], "k.")
         ax[0].plot(self.datapre.index, self.pre_pred, c="k", label="model fit")
 
-        ax[0].plot(self.datapost.index, self.post_y, "k.")
+        ax[0].plot(self.datapost.index, self.post_design["y"], "k.")
         ax[0].plot(
             self.datapost.index,
             self.post_pred,
@@ -820,7 +799,7 @@ def _ols_plot(
         ax[0].fill_between(
             self.datapost.index,
             y1=np.squeeze(self.post_pred),
-            y2=np.squeeze(self.post_y),
+            y2=np.squeeze(self.post_design["y"]),
             color="C0",
             alpha=0.25,
             label="Causal impact",