From 441bb1b6c29fda13aac4e7ed6841252826a56d49 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 20:56:37 +0100 Subject: [PATCH 1/8] Broaden numpydoc-validation scope across causalpy/ (#898) Replace the ``.plot``-only exclude regex in ``[tool.numpydoc_validation]`` with a private-member exclude (``\\._``), and widen the pre-commit ``files`` pattern from ``^causalpy/experiments/.*\\.py$`` to ``^causalpy/(?!tests/|data/).*\\.py$`` so the existing ``PR01``/``PR02`` checks now apply package-wide. Document and fix every public-API ``PR01``/``PR02`` violation that surfaces under the new scope: convert remaining Sphinx ``:param:`` blocks to numpydoc ``Parameters`` sections, add missing ``**kwargs``/``*args`` rows, populate the protocol/check/maketables/skl helpers, and tidy a ``See Also`` entry in ``utils.py`` that previously broke the docstring parser. The simulated-data helpers under ``causalpy/data/`` are intentionally excluded for this round; they keep their didactic ``:param`` style. Co-authored-by: Cursor --- .pre-commit-config.yaml | 17 +- causalpy/checks/bandwidth.py | 18 +- causalpy/checks/base.py | 17 ++ causalpy/checks/convex_hull.py | 18 +- causalpy/checks/leave_one_out.py | 18 +- causalpy/checks/mccrary.py | 18 +- causalpy/checks/persistence.py | 19 +- causalpy/checks/placebo_in_space.py | 18 +- causalpy/checks/placebo_in_time.py | 15 ++ causalpy/checks/pre_treatment_placebo.py | 18 +- causalpy/checks/prior_sensitivity.py | 18 +- causalpy/custom_exceptions.py | 24 ++- causalpy/experiments/base.py | 36 +++- causalpy/experiments/diff_in_diff.py | 12 +- causalpy/experiments/instrumental_variable.py | 32 +++- .../experiments/interrupted_time_series.py | 29 ++- .../inverse_propensity_weighting.py | 2 + causalpy/experiments/panel_regression.py | 37 ++++ causalpy/experiments/piecewise_its.py | 26 ++- causalpy/experiments/prepostnegd.py | 40 ++-- .../experiments/regression_discontinuity.py | 61 +++--- causalpy/experiments/regression_kink.py | 44 +++-- causalpy/experiments/staggered_did.py | 5 + causalpy/experiments/synthetic_control.py | 61 ++++-- causalpy/maketables_adapters.py | 134 ++++++++++++-- causalpy/pipeline.py | 35 +++- causalpy/pymc_models.py | 175 +++++++++++++++--- causalpy/skl_models.py | 56 +++++- causalpy/steps/estimate_effect.py | 12 ++ causalpy/steps/report.py | 22 ++- causalpy/steps/sensitivity.py | 39 +++- causalpy/transforms.py | 88 ++++++--- causalpy/utils.py | 9 +- pyproject.toml | 16 +- 34 files changed, 988 insertions(+), 201 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a5f7d5c68..ce79564cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,17 +56,20 @@ repos: additional_dependencies: # Support pyproject.toml configuration - tomli - # Validate that every public ``plot()`` override on a BaseExperiment subclass - # keeps its numpydoc Parameters block in sync with the function signature - # (issue #886). Configuration lives under [tool.numpydoc_validation] in - # pyproject.toml and is intentionally narrow: only ``.plot`` methods are - # checked, only PR01/PR02 are enforced, and the base ``BaseExperiment.plot`` - # is excluded because it uses ``*args, **kwargs`` for dispatch. + # Validate numpydoc-style docstrings across the package. The check set and + # exclude regex are configured under [tool.numpydoc_validation] in + # pyproject.toml. Originally introduced under #886 as a narrow check on + # public ``.plot`` overrides, the scope was widened under #898 to cover the + # whole ``causalpy`` package. The ``files`` pattern below excludes the test + # suite and the simulated-data helpers in ``causalpy/data/`` (those use the + # Sphinx ``:param:`` style for didactic clarity and are not part of the + # public API surface). Private members are excluded via the config-level + # regex in ``pyproject.toml``. - repo: https://github.com/numpy/numpydoc rev: v1.10.0 hooks: - id: numpydoc-validation - files: ^causalpy/experiments/.*\.py$ + files: ^causalpy/(?!tests/|data/).*\.py$ - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.20.1 hooks: diff --git a/causalpy/checks/bandwidth.py b/causalpy/checks/bandwidth.py index a3d24ced7..6212e54c4 100644 --- a/causalpy/checks/bandwidth.py +++ b/causalpy/checks/bandwidth.py @@ -60,7 +60,13 @@ def __init__(self, bandwidths: list[float] | None = None) -> None: self.bandwidths = bandwidths or [0.25, 0.5, 1.0, 2.0, np.inf] def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is an RD or RKink instance.""" + """Verify the experiment is an RD or RKink instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, (RegressionDiscontinuity, RegressionKink)): raise TypeError( "BandwidthSensitivity requires a RegressionDiscontinuity " @@ -72,7 +78,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Re-fit the experiment at multiple bandwidths and compare estimates.""" + """Re-fit the experiment at multiple bandwidths and compare estimates. + + Parameters + ---------- + experiment : BaseExperiment + The fitted RD or RKink experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/checks/base.py b/causalpy/checks/base.py index dfb5f8036..5c238a2cc 100644 --- a/causalpy/checks/base.py +++ b/causalpy/checks/base.py @@ -37,6 +37,17 @@ def clone_model(model: Any) -> Any: PyMC models cannot survive ``copy.deepcopy`` (the class identity is lost), so we use their ``_clone()`` method instead. For all other model types we fall back to ``copy.deepcopy``. + + Parameters + ---------- + model : Any + The model instance to clone. PyMC models must expose a ``_clone()`` + method; everything else falls back to :func:`copy.deepcopy`. + + Returns + ------- + Any + A fresh, unfitted copy of ``model``. """ if hasattr(model, "_clone"): return model._clone() @@ -88,6 +99,12 @@ class Check(Protocol): def validate(self, experiment: BaseExperiment) -> None: """Verify the check is applicable to the given experiment. + Parameters + ---------- + experiment : BaseExperiment + The experiment instance whose type is checked against + ``applicable_methods``. + Raises ------ TypeError diff --git a/causalpy/checks/convex_hull.py b/causalpy/checks/convex_hull.py index 8a7e2349a..6e5fcca95 100644 --- a/causalpy/checks/convex_hull.py +++ b/causalpy/checks/convex_hull.py @@ -39,7 +39,13 @@ class ConvexHullCheck: applicable_methods: set[type[BaseExperiment]] = {SyntheticControl} def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a SyntheticControl instance.""" + """Verify the experiment is a SyntheticControl instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, SyntheticControl): raise TypeError("ConvexHullCheck requires a SyntheticControl experiment.") @@ -48,7 +54,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Run the convex hull violation check on pre-treatment data.""" + """Run the convex hull violation check on pre-treatment data. + + Parameters + ---------- + experiment : BaseExperiment + The fitted SyntheticControl experiment. + context : PipelineContext + Pipeline context (unused; required by the check protocol). + """ sc = experiment datapre_control = sc.datapre_control # type: ignore[attr-defined] datapre_treated = sc.datapre_treated # type: ignore[attr-defined] diff --git a/causalpy/checks/leave_one_out.py b/causalpy/checks/leave_one_out.py index 2f20ac1e5..a38a63b4d 100644 --- a/causalpy/checks/leave_one_out.py +++ b/causalpy/checks/leave_one_out.py @@ -48,7 +48,13 @@ class LeaveOneOut: applicable_methods: set[type[BaseExperiment]] = {SyntheticControl} def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a SyntheticControl instance.""" + """Verify the experiment is a SyntheticControl instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, SyntheticControl): raise TypeError("LeaveOneOut requires a SyntheticControl experiment.") @@ -57,7 +63,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Drop each control unit in turn and compare effect estimates.""" + """Drop each control unit in turn and compare effect estimates. + + Parameters + ---------- + experiment : BaseExperiment + The fitted SyntheticControl experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/checks/mccrary.py b/causalpy/checks/mccrary.py index 31a68f504..9a6dfd722 100644 --- a/causalpy/checks/mccrary.py +++ b/causalpy/checks/mccrary.py @@ -62,7 +62,13 @@ def __init__(self, n_bins: int = 20, alpha: float = 0.05) -> None: self.alpha = alpha def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a RegressionDiscontinuity instance.""" + """Verify the experiment is a RegressionDiscontinuity instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, RegressionDiscontinuity): raise TypeError( "McCraryDensityTest requires a RegressionDiscontinuity experiment." @@ -73,7 +79,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Test for manipulation of the running variable at the threshold.""" + """Test for manipulation of the running variable at the threshold. + + Parameters + ---------- + experiment : BaseExperiment + The fitted RegressionDiscontinuity experiment. + context : PipelineContext + Pipeline context (unused; required by the check protocol). + """ rd = experiment threshold = rd.treatment_threshold # type: ignore[attr-defined] running_var = rd.running_variable_name # type: ignore[attr-defined] diff --git a/causalpy/checks/persistence.py b/causalpy/checks/persistence.py index acd871888..9f1d887b3 100644 --- a/causalpy/checks/persistence.py +++ b/causalpy/checks/persistence.py @@ -58,7 +58,13 @@ def __init__( self.direction = direction def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a three-period ITS with treatment_end_time.""" + """Verify the experiment is a three-period ITS with treatment_end_time. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, InterruptedTimeSeries): raise TypeError( "PersistenceCheck requires an InterruptedTimeSeries experiment." @@ -77,7 +83,16 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Run persistence analysis and report whether the effect decays.""" + """Run persistence analysis and report whether the effect decays. + + Parameters + ---------- + experiment : BaseExperiment + The fitted three-period ITS experiment. + context : PipelineContext + Pipeline context (unused by this check; required by the + :class:`~causalpy.checks.base.Check` protocol). + """ its: Any = experiment persistence = its.analyze_persistence( hdi_prob=self.hdi_prob, diff --git a/causalpy/checks/placebo_in_space.py b/causalpy/checks/placebo_in_space.py index 7ffb1281c..6e17b3950 100644 --- a/causalpy/checks/placebo_in_space.py +++ b/causalpy/checks/placebo_in_space.py @@ -50,7 +50,13 @@ class PlaceboInSpace: applicable_methods: set[type[BaseExperiment]] = {SyntheticControl} def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a SyntheticControl instance.""" + """Verify the experiment is a SyntheticControl instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, SyntheticControl): raise TypeError("PlaceboInSpace requires a SyntheticControl experiment.") @@ -59,7 +65,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Treat each control unit as treated and compare effect magnitudes.""" + """Treat each control unit as treated and compare effect magnitudes. + + Parameters + ---------- + experiment : BaseExperiment + The fitted SyntheticControl experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/checks/placebo_in_time.py b/causalpy/checks/placebo_in_time.py index 5730ddc1b..b63d0eb72 100644 --- a/causalpy/checks/placebo_in_time.py +++ b/causalpy/checks/placebo_in_time.py @@ -213,6 +213,11 @@ def __init__( def validate(self, experiment: BaseExperiment) -> None: """Check the experiment is compatible with PlaceboInTime. + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + Raises ------ TypeError @@ -553,6 +558,16 @@ def run( Can be used standalone (``context=None``) when ``experiment_factory`` was provided, or within a pipeline. + Parameters + ---------- + experiment : BaseExperiment + The fitted experiment whose treatment time will be shifted to + generate placebo folds. + context : PipelineContext or None, default None + Pipeline context providing ``experiment_config`` for re-fits. + If ``None``, an explicit ``experiment_factory`` must have been + supplied at construction time. + Returns ------- CheckResult diff --git a/causalpy/checks/pre_treatment_placebo.py b/causalpy/checks/pre_treatment_placebo.py index 723cad9e5..7e872bbe0 100644 --- a/causalpy/checks/pre_treatment_placebo.py +++ b/causalpy/checks/pre_treatment_placebo.py @@ -49,7 +49,13 @@ def __init__(self, threshold: float = 0.05) -> None: self.threshold = threshold def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a fitted StaggeredDifferenceInDifferences.""" + """Verify the experiment is a fitted StaggeredDifferenceInDifferences. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, StaggeredDifferenceInDifferences): raise TypeError( "PreTreatmentPlaceboCheck requires a " @@ -66,7 +72,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Evaluate pre-treatment event-study ATTs for evidence of pre-trends.""" + """Evaluate pre-treatment event-study ATTs for evidence of pre-trends. + + Parameters + ---------- + experiment : BaseExperiment + The fitted StaggeredDifferenceInDifferences experiment. + context : PipelineContext + Pipeline context (unused; required by the check protocol). + """ sdid = experiment att_et = sdid.att_event_time_ # type: ignore[attr-defined] diff --git a/causalpy/checks/prior_sensitivity.py b/causalpy/checks/prior_sensitivity.py index 965c11932..38136bac8 100644 --- a/causalpy/checks/prior_sensitivity.py +++ b/causalpy/checks/prior_sensitivity.py @@ -93,7 +93,13 @@ def __init__(self, alternatives: list[dict[str, Any]]) -> None: self.alternatives = alternatives def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment uses a Bayesian (PyMC) model.""" + """Verify the experiment uses a Bayesian (PyMC) model. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment.model, PyMCModel): raise TypeError( "PriorSensitivity requires a Bayesian (PyMC) model. " @@ -105,7 +111,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Re-fit with each alternative model and compare effect estimates.""" + """Re-fit with each alternative model and compare effect estimates. + + Parameters + ---------- + experiment : BaseExperiment + The fitted Bayesian experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/custom_exceptions.py b/causalpy/custom_exceptions.py index f61170b9e..039e268ef 100644 --- a/causalpy/custom_exceptions.py +++ b/causalpy/custom_exceptions.py @@ -18,7 +18,13 @@ class BadIndexException(Exception): """Custom exception used when we have a mismatch in types between the dataframe - index and an event, typically a treatment or intervention.""" + index and an event, typically a treatment or intervention. + + Parameters + ---------- + message : str + Human-readable description of the index mismatch. + """ def __init__(self, message: str): super().__init__(message) @@ -27,7 +33,13 @@ def __init__(self, message: str): class FormulaException(Exception): """Exception raised given when there is some error in a user-provided model - formula""" + formula. + + Parameters + ---------- + message : str + Human-readable description of the formula problem. + """ def __init__(self, message: str): super().__init__(message) @@ -35,7 +47,13 @@ def __init__(self, message: str): class DataException(Exception): - """Exception raised given when there is some error in user-provided dataframe""" + """Exception raised given when there is some error in user-provided dataframe. + + Parameters + ---------- + message : str + Human-readable description of the data problem. + """ def __init__(self, message: str): super().__init__(message) diff --git a/causalpy/experiments/base.py b/causalpy/experiments/base.py index 375873e56..b217436d2 100644 --- a/causalpy/experiments/base.py +++ b/causalpy/experiments/base.py @@ -98,6 +98,12 @@ class BaseExperiment(ABC): (e.g. ``LinearRegression``) so that ``model=None`` instantiates a sensible Bayesian default. To use an OLS/sklearn model, pass one explicitly. + Parameters + ---------- + model : PyMCModel, RegressorMixin, or None, default None + Model instance to use. If ``None`` and ``_default_model_class`` is set, + an instance of that default class is constructed. + Notes ----- Optional ``maketables`` integration is exposed through ``__maketables_*`` @@ -308,6 +314,13 @@ def get_plot_data(self, *args: Any, **kwargs: Any) -> pd.DataFrame: Internally, this function dispatches to either :func:`get_plot_data_bayesian` or :func:`get_plot_data_ols` depending on the model type. + + Parameters + ---------- + *args + Positional arguments forwarded to the model-specific implementation. + **kwargs + Keyword arguments forwarded to the model-specific implementation. """ if isinstance(self.model, PyMCModel): return self.get_plot_data_bayesian(*args, **kwargs) @@ -317,11 +330,27 @@ def get_plot_data(self, *args: Any, **kwargs: Any) -> pd.DataFrame: raise ValueError("Unsupported model type") def get_plot_data_bayesian(self, *args: Any, **kwargs: Any) -> pd.DataFrame: - """Return plot data for Bayesian models. Override in subclasses that support Bayesian.""" + """Return plot data for Bayesian models. Override in subclasses that support Bayesian. + + Parameters + ---------- + *args + Positional arguments forwarded to the subclass implementation. + **kwargs + Keyword arguments forwarded to the subclass implementation. + """ raise NotImplementedError("get_plot_data_bayesian method not yet implemented") def get_plot_data_ols(self, *args: Any, **kwargs: Any) -> pd.DataFrame: - """Return plot data for OLS models. Override in subclasses that support OLS.""" + """Return plot data for OLS models. Override in subclasses that support OLS. + + Parameters + ---------- + *args + Positional arguments forwarded to the subclass implementation. + **kwargs + Keyword arguments forwarded to the subclass implementation. + """ raise NotImplementedError("get_plot_data_ols method not yet implemented") @abstractmethod @@ -383,6 +412,9 @@ def effect_summary( prefix : str, optional Prefix for prose generation (e.g., "During intervention", "Post-intervention"). Defaults to "Post-period". + **kwargs + Reserved for forward-compatibility; subclasses may consume + additional keyword arguments. Returns ------- diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py index 269b7e9bd..350cfba22 100644 --- a/causalpy/experiments/diff_in_diff.py +++ b/causalpy/experiments/diff_in_diff.py @@ -74,6 +74,8 @@ class DifferenceInDifferences(BaseExperiment): Defaults to "post_treatment". model : PyMCModel or RegressorMixin, optional A PyMC model for difference in differences. Defaults to LinearRegression. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -320,8 +322,11 @@ def _validate_formula_interaction_terms(self) -> None: def summary(self, round_to: int | None = 2) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Formula: {self.formula}") @@ -671,6 +676,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/instrumental_variable.py b/causalpy/experiments/instrumental_variable.py index e724f59a6..67bc0faa3 100644 --- a/causalpy/experiments/instrumental_variable.py +++ b/causalpy/experiments/instrumental_variable.py @@ -65,6 +65,8 @@ class InstrumentalVariable(BaseExperiment): A indicator for whether the treatment to be modelled is binary or not. Determines which PyMC model we use to model the joint outcome and treatment. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -291,8 +293,11 @@ def plot( def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ raise NotImplementedError("Summary method not implemented.") @@ -314,6 +319,29 @@ def effect_summary( Generate a decision-ready summary of causal effects. Note: effect_summary is not yet implemented for InstrumentalVariable experiments. + + Parameters + ---------- + window : str, tuple, or slice, default "post" + Time window for analysis (unused for InstrumentalVariable). + direction : {"increase", "decrease", "two-sided"}, default "increase" + Direction for tail probability calculation. + alpha : float, default 0.05 + Significance level for HDI/CI intervals. + cumulative : bool, default True + Whether to include cumulative effect statistics. + relative : bool, default True + Whether to include relative effect statistics. + min_effect : float, optional + Region of Practical Equivalence (ROPE) threshold. + treated_unit : str, optional + For multi-unit experiments, the unit to analyse. + period : {"intervention", "post", "comparison"}, optional + Period selector for three-period designs. + prefix : str, default "Post-period" + Prefix for prose generation. + **kwargs + Reserved for forward-compatibility. """ raise NotImplementedError( "effect_summary is not yet implemented for InstrumentalVariable experiments." diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py index 97311dbbd..111a2e83d 100644 --- a/causalpy/experiments/interrupted_time_series.py +++ b/causalpy/experiments/interrupted_time_series.py @@ -262,7 +262,17 @@ def input_validation( treatment_time: int | float | pd.Timestamp, treatment_end_time: int | float | pd.Timestamp | None = None, ) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness. + + Parameters + ---------- + data : pd.DataFrame + The experiment data. + treatment_time : int, float, or pd.Timestamp + Start of the treatment period. + treatment_end_time : int, float, pd.Timestamp, or None, default None + Optional end of the treatment period for three-period designs. + """ if isinstance(data.index, pd.DatetimeIndex) and not isinstance( treatment_time, pd.Timestamp ): @@ -591,8 +601,11 @@ def _comparison_period_summary( def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Formula: {self.formula}") @@ -965,8 +978,11 @@ def get_plot_data_bayesian(self, hdi_prob: float = HDI_PROB) -> pd.DataFrame: """ Recover the data of the experiment along with the prediction and causal impact information. - :param hdi_prob: - Prob for which the highest density interval will be computed. The default value is defined as the default from the :func:`arviz.hdi` function. + Parameters + ---------- + hdi_prob : float, default :data:`~causalpy.constants.HDI_PROB` + Probability mass of the highest density interval. Defaults to the + project-wide :data:`~causalpy.constants.HDI_PROB` (currently 0.94). """ if isinstance(self.model, PyMCModel): hdi_pct = int(round(hdi_prob * 100)) @@ -1321,6 +1337,9 @@ def effect_summary( prefix : str, optional Prefix for prose generation (e.g., "During intervention", "Post-intervention"). Defaults to "Post-period". + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/inverse_propensity_weighting.py b/causalpy/experiments/inverse_propensity_weighting.py index 0c92b2ba5..107c17f67 100644 --- a/causalpy/experiments/inverse_propensity_weighting.py +++ b/causalpy/experiments/inverse_propensity_weighting.py @@ -51,6 +51,8 @@ class InversePropensityWeighting(BaseExperiment): of these weighting schemes. model : PropensityScore, optional A PyMC model. Defaults to PropensityScore. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- diff --git a/causalpy/experiments/panel_regression.py b/causalpy/experiments/panel_regression.py index 0ec4fc472..815f37fa1 100644 --- a/causalpy/experiments/panel_regression.py +++ b/causalpy/experiments/panel_regression.py @@ -68,6 +68,8 @@ class PanelRegression(BaseExperiment): but doesn't directly estimate individual unit effects. model : PyMCModel or RegressorMixin, optional A PyMC (Bayesian) or sklearn (OLS) model. If None, a model must be provided. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Attributes ---------- @@ -468,6 +470,29 @@ def effect_summary( so the standard ITS/SC-style effect summary does not directly apply. Use :meth:`summary` for coefficient-level inference. + Parameters + ---------- + window : str, tuple, or slice, default "post" + Time window for analysis (placeholder; not consumed). + direction : {"increase", "decrease", "two-sided"}, default "increase" + Direction for tail probability calculation. + alpha : float, default 0.05 + Significance level for HDI/CI intervals. + cumulative : bool, default True + Whether to include cumulative effect statistics. + relative : bool, default True + Whether to include relative effect statistics. + min_effect : float, optional + Region of Practical Equivalence (ROPE) threshold. + treated_unit : str, optional + Treated unit selector for multi-unit experiments. + period : {"intervention", "post", "comparison"}, optional + Period selector for three-period designs. + prefix : str, default "Post-period" + Prefix for prose generation. + **kwargs + Reserved for forward-compatibility. + Raises ------ NotImplementedError @@ -604,6 +629,12 @@ def _plot_coefficients_internal( def get_plot_data_bayesian(self, **kwargs: Any) -> pd.DataFrame: """Get plot data for Bayesian model. + Parameters + ---------- + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. + Returns ------- pd.DataFrame @@ -636,6 +667,12 @@ def get_plot_data_bayesian(self, **kwargs: Any) -> pd.DataFrame: def get_plot_data_ols(self, **kwargs: Any) -> pd.DataFrame: """Get plot data for OLS model. + Parameters + ---------- + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. + Returns ------- pd.DataFrame diff --git a/causalpy/experiments/piecewise_its.py b/causalpy/experiments/piecewise_its.py index 99e33784d..724c38efe 100644 --- a/causalpy/experiments/piecewise_its.py +++ b/causalpy/experiments/piecewise_its.py @@ -813,7 +813,31 @@ def effect_summary( prefix: str = "Post-period", **kwargs: Any, ) -> EffectSummary: - """Generate a decision-ready summary of PiecewiseITS causal effects.""" + """Generate a decision-ready summary of PiecewiseITS causal effects. + + Parameters + ---------- + window : str, tuple, or slice, default "post" + Time window for analysis (see :meth:`BaseExperiment.effect_summary`). + direction : {"increase", "decrease", "two-sided"}, default "increase" + Direction for tail probability calculation (PyMC only). + alpha : float, default 0.05 + Significance level for HDI/CI intervals (1-alpha confidence). + cumulative : bool, default True + Whether to include cumulative effect statistics. + relative : bool, default True + Whether to include relative effect statistics. + min_effect : float, optional + Region of Practical Equivalence (ROPE) threshold (PyMC only). + treated_unit : str, optional + Multi-unit experiments select which unit to analyse. + period : None + Not supported by PiecewiseITS; pass ``None``. + prefix : str, default "Post-period" + Prefix for prose generation. + **kwargs + Reserved for forward-compatibility. + """ from causalpy.reporting import ( _compute_statistics, _compute_statistics_ols, diff --git a/causalpy/experiments/prepostnegd.py b/causalpy/experiments/prepostnegd.py index 3c5fda665..ece660c06 100644 --- a/causalpy/experiments/prepostnegd.py +++ b/causalpy/experiments/prepostnegd.py @@ -40,19 +40,23 @@ class PrePostNEGD(BaseExperiment): """ - A class to analyse data from pretest/posttest designs - - :param data: - A pandas dataframe - :param formula: - A statistical model formula - :param group_variable_name: - Name of the column in data for the group variable, should be either - binary or boolean - :param pretreatment_variable_name: - Name of the column in data for the pretreatment variable - :param model: - A PyMC model. Defaults to LinearRegression. + A class to analyse data from pretest/posttest designs. + + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + formula : str + A statistical model formula. + group_variable_name : str + Name of the column in ``data`` for the group variable; should be + either binary or boolean. + pretreatment_variable_name : str + Name of the column in ``data`` for the pretreatment variable. + model : PyMCModel, optional + A PyMC model. Defaults to :class:`LinearRegression`. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -226,8 +230,11 @@ def _causal_impact_summary_stat(self, round_to: int | None = 2) -> str: def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Formula: {self.formula}") @@ -385,6 +392,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/regression_discontinuity.py b/causalpy/experiments/regression_discontinuity.py index 516c9d731..17e28d658 100644 --- a/causalpy/experiments/regression_discontinuity.py +++ b/causalpy/experiments/regression_discontinuity.py @@ -46,28 +46,33 @@ class RegressionDiscontinuity(BaseExperiment): """ A class to analyse sharp regression discontinuity experiments. - :param data: - A pandas dataframe - :param formula: - A statistical model formula - :param treatment_threshold: - A scalar threshold value at which the treatment is applied - :param model: - A PyMC or sklearn model. Defaults to LinearRegression. - :param running_variable_name: - The name of the predictor variable that the treatment threshold is based upon - :param epsilon: - A small scalar value which determines how far above and below the treatment - threshold to evaluate the causal impact. - :param bandwidth: - Data outside of the bandwidth (relative to the discontinuity) is not used to fit - the model. - :param donut_hole: - Observations within this distance from the treatment threshold are excluded from - model fitting. Used as a robustness check when observations closest to the - threshold may be problematic (e.g., due to manipulation or heaping). Defaults - to 0.0 (no exclusion). Must be non-negative and less than bandwidth if bandwidth - is finite. + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + formula : str + A statistical model formula. + treatment_threshold : float + A scalar threshold value at which the treatment is applied. + model : PyMCModel, RegressorMixin, or None, default None + A PyMC or sklearn model. Defaults to :class:`LinearRegression`. + running_variable_name : str, default "x" + The name of the predictor variable that the treatment threshold is + based upon. + epsilon : float, default 0.001 + A small scalar value which determines how far above and below the + treatment threshold to evaluate the causal impact. + bandwidth : float, default np.inf + Data outside of the bandwidth (relative to the discontinuity) is not + used to fit the model. + donut_hole : float, default 0.0 + Observations within this distance from the treatment threshold are + excluded from model fitting. Used as a robustness check when + observations closest to the threshold may be problematic (e.g., due + to manipulation or heaping). Must be non-negative and less than + ``bandwidth`` if ``bandwidth`` is finite. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -278,10 +283,13 @@ def _is_treated(self, x: np.ndarray | pd.Series) -> np.ndarray: def summary(self, round_to: int | None = None) -> None: """ - Print summary of main results and model coefficients + Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers. + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print("Regression Discontinuity experiment") print(f"Formula: {self.formula}") @@ -546,6 +554,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/regression_kink.py b/causalpy/experiments/regression_kink.py index b037af56a..d107805ec 100644 --- a/causalpy/experiments/regression_kink.py +++ b/causalpy/experiments/regression_kink.py @@ -44,21 +44,25 @@ class RegressionKink(BaseExperiment): """A class to analyse regression kink designs. - :param data: - A pandas dataframe - :param formula: - A statistical model formula - :param kink_point: - A scalar value at which the kink occurs - :param model: - A PyMC model. Defaults to LinearRegression. - :param running_variable_name: - The name of the running variable column - :param epsilon: - A small scalar for evaluating the causal impact above/below the kink - :param bandwidth: - Data outside of the bandwidth (relative to the kink) is not used to fit - the model. + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + formula : str + A statistical model formula. + kink_point : float + A scalar value at which the kink occurs. + model : PyMCModel, optional + A PyMC model. Defaults to :class:`LinearRegression`. + running_variable_name : str, default "x" + The name of the running variable column. + epsilon : float, default 0.001 + A small scalar for evaluating the causal impact above/below the kink. + bandwidth : float, default np.inf + Data outside of the bandwidth (relative to the kink) is not used to + fit the model. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. """ supports_ols = False @@ -229,8 +233,11 @@ def _is_treated(self, x: np.ndarray | pd.Series) -> np.ndarray: def summary(self, round_to: int | None = 2) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print( f""" @@ -389,6 +396,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/staggered_did.py b/causalpy/experiments/staggered_did.py index 2bfb0bab0..06b047143 100644 --- a/causalpy/experiments/staggered_did.py +++ b/causalpy/experiments/staggered_did.py @@ -86,6 +86,8 @@ class StaggeredDifferenceInDifferences(BaseExperiment): reference_event_time : int, optional Event-time index associated with plots (reserved for future use). Defaults to -1. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Attributes ---------- @@ -1039,6 +1041,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/synthetic_control.py b/causalpy/experiments/synthetic_control.py index 7ccc57b57..a69e2e5b4 100644 --- a/causalpy/experiments/synthetic_control.py +++ b/causalpy/experiments/synthetic_control.py @@ -39,23 +39,27 @@ class SyntheticControl(BaseExperiment): """The class for the synthetic control experiment. - :param data: - A pandas dataframe - :param treatment_time: - The time when treatment occurred, should be in reference to the data index - :param control_units: - A list of control units to be used in the experiment - :param treated_units: - A list of treated units to be used in the experiment - :param model: - A PyMC or sklearn model. Defaults to WeightedSumFitter. - :param min_donor_correlation: + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + treatment_time : int, float, or pd.Timestamp + The time when treatment occurred, in reference to the data index. + control_units : list of str + A list of control units to be used in the experiment. + treated_units : list of str + A list of treated units to be used in the experiment. + model : PyMCModel, RegressorMixin, or None, default None + A PyMC or sklearn model. Defaults to :class:`WeightedSumFitter`. + min_donor_correlation : float, default 0.0 Minimum acceptable Pearson correlation between each control unit and treated unit in the pre-treatment period. Control units below this threshold trigger a ``UserWarning``. Defaults to ``0.0`` (warn on negatively correlated donors). + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. - Example + Examples -------- >>> import causalpy as cp >>> df = cp.load_data("sc") @@ -311,7 +315,15 @@ def algorithm(self) -> None: def input_validation( self, data: pd.DataFrame, treatment_time: int | float | pd.Timestamp ) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness. + + Parameters + ---------- + data : pd.DataFrame + The experiment data. + treatment_time : int, float, or pd.Timestamp + The treatment time, expected to be compatible with ``data.index``. + """ if isinstance(data.index, pd.DatetimeIndex) and not isinstance( treatment_time, pd.Timestamp ): @@ -352,8 +364,11 @@ def _pre_treatment_correlations(self) -> dict[str, float]: def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Control units: {self.control_units}") @@ -759,11 +774,14 @@ def get_plot_data_bayesian( """ Recover the data of the PrePostFit experiment along with the prediction and causal impact information. - :param hdi_prob: - Prob for which the highest density interval will be computed. The default value is defined as the default from the :func:`arviz.hdi` function. - :param treated_unit: - Which treated unit to extract data for. Must be a string name of the treated unit. - If None, uses the first treated unit. + Parameters + ---------- + hdi_prob : float, default :data:`~causalpy.constants.HDI_PROB` + Probability mass of the highest density interval. Defaults to + the project-wide :data:`~causalpy.constants.HDI_PROB`. + treated_unit : str, optional + Which treated unit to extract data for. Must be a string name + of the treated unit. If ``None``, uses the first treated unit. """ if not isinstance(self.model, PyMCModel): raise ValueError("Unsupported model type") @@ -907,6 +925,9 @@ def effect_summary( Ignored for Synthetic Control (two-period design only). prefix : str, optional Prefix for prose generation. Defaults to "Post-period". + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/maketables_adapters.py b/causalpy/maketables_adapters.py index 33bf3b593..b05837c53 100644 --- a/causalpy/maketables_adapters.py +++ b/causalpy/maketables_adapters.py @@ -36,23 +36,55 @@ class MaketablesAdapter(Protocol): """Protocol for backend-specific maketables extraction.""" def coef_table(self, experiment: Any) -> pd.DataFrame: - """Return canonical coefficient table for maketables.""" + """Return canonical coefficient table for maketables. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... def stat(self, experiment: Any, key: str) -> Any: - """Return a single model-level statistic by key.""" + """Return a single model-level statistic by key. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + key : str + Statistic identifier (see :meth:`default_stat_keys`). + """ ... def vcov_info(self, experiment: Any) -> dict[str, Any]: - """Return variance-covariance metadata dict.""" + """Return variance-covariance metadata dict. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... def stat_labels(self, experiment: Any) -> dict[str, str] | None: - """Return display labels for statistics.""" + """Return display labels for statistics. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... def default_stat_keys(self, experiment: Any) -> list[str] | None: - """Return ordered list of default statistic keys.""" + """Return ordered list of default statistic keys. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... @@ -218,7 +250,13 @@ class PyMCMaketablesAdapter: """Adapter for experiments backed by PyMCModel.""" def coef_table(self, experiment: Any) -> pd.DataFrame: - """Build coefficient table from PyMC posterior draws with HDI intervals.""" + """Build coefficient table from PyMC posterior draws with HDI intervals. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ labels = list(getattr(experiment, "labels", [])) if not labels: msg = "Experiment has no coefficient labels for maketables export." @@ -242,7 +280,15 @@ def coef_table(self, experiment: Any) -> pd.DataFrame: ) def stat(self, experiment: Any, key: str) -> Any: - """Return a single Bayesian model-level statistic by key.""" + """Return a single Bayesian model-level statistic by key. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + key : str + Statistic identifier. + """ stats: dict[str, Any] = { "N": _safe_observation_count(experiment), "r2": _safe_r2_value(experiment), @@ -253,15 +299,33 @@ def stat(self, experiment: Any, key: str) -> Any: return stats.get(key) def vcov_info(self, experiment: Any) -> dict[str, Any]: - """Return Bayesian posterior variance-covariance metadata.""" + """Return Bayesian posterior variance-covariance metadata. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ return {"se_type": "Bayesian posterior", "vcov": None} def stat_labels(self, experiment: Any) -> dict[str, str] | None: - """Return display labels for Bayesian model statistics.""" + """Return display labels for Bayesian model statistics. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ return {"N": "N", "r2": "Bayesian R2", "se_type": "SE type"} def default_stat_keys(self, experiment: Any) -> list[str] | None: - """Return ordered list of default statistic keys for Bayesian models.""" + """Return ordered list of default statistic keys for Bayesian models. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ keys = ["N"] if _safe_r2_value(experiment) is not None: keys.append("r2") @@ -272,7 +336,13 @@ class SklearnMaketablesAdapter: """Adapter for experiments backed by sklearn RegressorMixin.""" def coef_table(self, experiment: Any) -> pd.DataFrame: - """Build coefficient table from sklearn model coefficients.""" + """Build coefficient table from sklearn model coefficients. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ labels = list(getattr(experiment, "labels", [])) if not labels: msg = "Experiment has no coefficient labels for maketables export." @@ -291,7 +361,15 @@ def coef_table(self, experiment: Any) -> pd.DataFrame: return _canonical_frame(labels=labels, b=coeffs, se=nans, p=nans) def stat(self, experiment: Any, key: str) -> Any: - """Return a single OLS model-level statistic by key.""" + """Return a single OLS model-level statistic by key. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + key : str + Statistic identifier. + """ stats: dict[str, Any] = { "N": _safe_observation_count(experiment), "r2": _safe_r2_value(experiment), @@ -302,15 +380,33 @@ def stat(self, experiment: Any, key: str) -> Any: return stats.get(key) def vcov_info(self, experiment: Any) -> dict[str, Any]: - """Return OLS variance-covariance metadata.""" + """Return OLS variance-covariance metadata. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ return {"se_type": "Not available", "vcov": None} def stat_labels(self, experiment: Any) -> dict[str, str] | None: - """Return display labels for OLS model statistics.""" + """Return display labels for OLS model statistics. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ return {"N": "N", "r2": "R2", "se_type": "SE type"} def default_stat_keys(self, experiment: Any) -> list[str] | None: - """Return ordered list of default statistic keys for OLS models.""" + """Return ordered list of default statistic keys for OLS models. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ keys = ["N"] if _safe_r2_value(experiment) is not None: keys.append("r2") @@ -318,7 +414,13 @@ def default_stat_keys(self, experiment: Any) -> list[str] | None: def get_maketables_adapter(model: Any) -> MaketablesAdapter: - """Return the adapter for a model backend.""" + """Return the adapter for a model backend. + + Parameters + ---------- + model : Any + A PyMC or sklearn model instance. + """ if isinstance(model, PyMCModel): return PyMCMaketablesAdapter() if isinstance(model, RegressorMixin): diff --git a/causalpy/pipeline.py b/causalpy/pipeline.py index 0ba0f2f7b..bc0bb499e 100644 --- a/causalpy/pipeline.py +++ b/causalpy/pipeline.py @@ -87,7 +87,19 @@ class PipelineResult: @classmethod def from_context(cls, context: PipelineContext) -> PipelineResult: - """Build a ``PipelineResult`` from a completed ``PipelineContext``.""" + """Build a ``PipelineResult`` from a completed ``PipelineContext``. + + Parameters + ---------- + context : PipelineContext + Completed pipeline context to extract user-facing results from. + + Returns + ------- + PipelineResult + Snapshot containing the experiment, effect summary, sensitivity + results, and report. + """ return cls( experiment=context.experiment, effect_summary=context.effect_summary, @@ -109,11 +121,28 @@ class Step(Protocol): """ def validate(self, context: PipelineContext) -> None: - """Check configuration before execution.""" + """Check configuration before execution. + + Parameters + ---------- + context : PipelineContext + Shared pipeline context. + """ ... def run(self, context: PipelineContext) -> PipelineContext: - """Execute the step, mutating and returning the context.""" + """Execute the step, mutating and returning the context. + + Parameters + ---------- + context : PipelineContext + Shared pipeline context, which the step is allowed to mutate. + + Returns + ------- + PipelineContext + The same context, returned for chaining convenience. + """ ... diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index 605c1a41d..276902153 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -75,6 +75,16 @@ class PyMCModel(pm.Model): methods like `fit`, `predict`, and `score`. It also provides other methods which are useful for causal inference. + Parameters + ---------- + sample_kwargs : dict, optional + Dictionary of kwargs that get unpacked and passed to the + :func:`pymc.sample` function. Defaults to an empty dictionary if + ``None``. + priors : dict, optional + Dictionary of priors for the model. Defaults to ``None``, in which + case default priors are used. + Example ------- >>> import causalpy as cp @@ -334,10 +344,23 @@ def predict( **kwargs, ): """ - Predict data given input data `X` + Predict data given input data `X`. .. caution:: Results in KeyError if model hasn't been fit. + + Parameters + ---------- + X : xr.DataArray + Input features for which predictions are required. + coords : dict, optional + Coordinate names for named dimensions. Forwarded to subclass + ``_data_setter`` overrides; ignored by the base implementation. + out_of_sample : bool, optional + Marker for out-of-sample prediction. Reserved for subclasses; + the base implementation does not act on it. + **kwargs + Reserved for subclass extensions. """ # Ensure random_seed is used in sample_prior_predictive() and @@ -376,6 +399,17 @@ def score(self, X, y, coords: dict[str, Any] | None = None, **kwargs) -> pd.Seri The Bayesian :math:`R^2` is not the same as the traditional coefficient of determination, https://en.wikipedia.org/wiki/Coefficient_of_determination. + Parameters + ---------- + X : xr.DataArray + Input features. + y : xr.DataArray + Observed targets to score against the posterior predictive mean. + coords : dict, optional + Coordinate names for named dimensions. Forwarded to + :meth:`predict`; ignored by the base implementation. + **kwargs + Reserved for subclass extensions. """ mu = self.predict(X) mu_data = az.extract(mu, group="posterior_predictive", var_names="mu") @@ -467,30 +501,30 @@ def print_coefficients( if self.idata is None: raise RuntimeError("Model has not been fit") - def print_row( + def _print_row( max_label_length: int, name: str, coeff_samples: xr.DataArray, round_to: int ) -> None: - """Print one row of the coefficient table""" + """Print one row of the coefficient table.""" formatted_name = f" {name: <{max_label_length}}" formatted_val = f"{round_num(coeff_samples.mean().data, round_to)}, {HDI_PROB * 100:.0f}% HDI [{round_num(coeff_samples.quantile((1 - HDI_PROB) / 2).data, round_to)}, {round_num(coeff_samples.quantile(1 - (1 - HDI_PROB) / 2).data, round_to)}]" # noqa: E501 print(f" {formatted_name} {formatted_val}") - def print_coefficients_for_unit( + def _print_coefficients_for_unit( unit_coeffs: xr.DataArray, unit_sigma: xr.DataArray, labels: list, round_to: int, ) -> None: - """Print coefficients for a single unit""" + """Print coefficients for a single unit.""" # Determine the width of the longest label max_label_length = max(len(name) for name in labels + ["y_hat_sigma"]) for name in labels: coeff_samples = unit_coeffs.sel(coeffs=name) - print_row(max_label_length, name, coeff_samples, round_to) + _print_row(max_label_length, name, coeff_samples, round_to) # Add coefficient for measurement std - print_row(max_label_length, "y_hat_sigma", unit_sigma, round_to) + _print_row(max_label_length, "y_hat_sigma", unit_sigma, round_to) print("Model coefficients:") coeffs = az.extract(self.idata.posterior, var_names="beta") @@ -515,7 +549,7 @@ def print_coefficients_for_unit( unit_sigma = az.extract(self.idata.posterior, var_names=sigma_var_name).sel( treated_units=unit ) - print_coefficients_for_unit(unit_coeffs, unit_sigma, labels, round_to or 2) + _print_coefficients_for_unit(unit_coeffs, unit_sigma, labels, round_to or 2) class LinearRegression(PyMCModel): @@ -568,7 +602,16 @@ def build_model( self, X: xr.DataArray, y: xr.DataArray, coords: dict[str, Any] | None ) -> None: """ - Defines the PyMC model + Define the PyMC model. + + Parameters + ---------- + X : xr.DataArray + Design matrix with dims ``("obs_ind", "coeffs")``. + y : xr.DataArray + Outcome with dims ``("obs_ind", "treated_units")``. + coords : dict or None + Coordinate names for the model's named dimensions. """ with self: # Ensure treated_units coordinate exists for consistency @@ -668,7 +711,16 @@ def build_model( self, X: xr.DataArray, y: xr.DataArray, coords: dict[str, Any] | None ) -> None: """ - Defines the PyMC model + Define the PyMC model. + + Parameters + ---------- + X : xr.DataArray + Design matrix with dims ``("obs_ind", "coeffs")``. + y : xr.DataArray + Outcome with dims ``("obs_ind", "treated_units")``. + coords : dict or None + Coordinate names for the model's named dimensions. """ with self: self.add_coords(coords) @@ -866,6 +918,15 @@ def build_model( ) -> None: """ Build the PyMC model with softmax-parameterized simplex weights. + + Parameters + ---------- + X : xr.DataArray + Design matrix with dims ``("obs_ind", "coeffs")``. + y : xr.DataArray + Outcome with dims ``("obs_ind", "treated_units")``. + coords : dict or None + Coordinate names for the model's named dimensions. """ if not coords or "coeffs" not in coords: raise ValueError( @@ -973,13 +1034,15 @@ def build_model( # type: ignore Dictionary of priors for the mus and sigmas of both regressions. Example: ``priors = {"mus": [0, 0], "sigmas": [1, 1], "eta": 2, "lkj_sd": 2}``. - vs_prior_type: An optional string. Can be "spike_and_slab" - or "horseshoe" or "normal - vs_hyperparams: An optional dictionary of priors for the - variable selection hyperparameters - binary_treatment: A flag for determining the relevant - likelihood to be used. - + vs_prior_type : {"spike_and_slab", "horseshoe", "normal"}, optional + Optional variable-selection prior type. ``None`` falls back to + standard normal priors. + vs_hyperparams : dict, optional + Hyperparameters for the variable-selection prior. Only consulted + when ``vs_prior_type`` is set. + binary_treatment : bool, default False + Whether the treatment ``t`` is binary; selects the relevant + likelihood term. """ # --- Priors --- @@ -1117,7 +1180,15 @@ def sample_predictive_distribution(self, ppc_sampler: str | None = "jax") -> Non using the JAX sampler compilation method. If using the JAX sampler it will sample only the posterior predictive distribution. If using the PYMC sampler if will sample both the prior - and posterior predictive distributions.""" + and posterior predictive distributions. + + Parameters + ---------- + ppc_sampler : {"jax", "pymc"}, optional + Backend used for posterior predictive sampling. ``"jax"`` (the + default) is much faster for the multivariate Normal likelihood; + ``"pymc"`` additionally samples the prior predictive. + """ random_seed = self.sample_kwargs.get("random_seed", None) if ppc_sampler == "jax": @@ -1160,6 +1231,29 @@ def fit( # type: ignore[override] We default to None, so the user can determine if they wish to spend time sampling the posterior predictive distribution independently. + + Parameters + ---------- + X : np.ndarray + Array used to predict the outcome ``y``. + Z : np.ndarray + Array used to predict the treatment variable ``t``. + y : np.ndarray + Focal outcome. + t : np.ndarray + Treatment whose causal impact is being estimated. + coords : dict + Coordinate names for the instruments and covariates. + priors : dict + Prior specification dictionary forwarded to :meth:`build_model`. + ppc_sampler : {"jax", "pymc"}, optional + Backend for posterior predictive sampling. ``None`` skips it. + vs_prior_type : {"spike_and_slab", "horseshoe", "normal"}, optional + Variable-selection prior type, forwarded to :meth:`build_model`. + vs_hyperparams : dict, optional + Hyperparameters for the variable-selection prior. + binary_treatment : bool, default False + Whether the treatment ``t`` is binary. """ # Ensure random_seed is used in sample_prior_predictive() and @@ -1222,7 +1316,23 @@ def build_model( # type: ignore prior: dict[str, Any] | None = None, noncentred: bool = True, ) -> None: - "Defines the PyMC propensity model" + """Define the PyMC propensity model. + + Parameters + ---------- + X : np.ndarray + Covariate matrix used to predict the treatment. + t : np.ndarray + Observed treatment indicator (0/1). + coords : dict + Coordinate names for named dimensions of the model. + prior : dict, optional + Prior specification overrides; see :attr:`default_priors` for + the expected keys. + noncentred : bool, default True + Reserved for future non-centred parameterisations of the + coefficient prior. Currently informational only. + """ with self: self.add_coords(coords) X_data = pm.Data("X", X, dims=["obs_ind", "coeffs"]) @@ -1243,6 +1353,19 @@ def fit( # type: ignore """Draw samples from posterior, prior predictive, and posterior predictive distributions. We overwrite the base method because the base method assumes a variable y and we use t to indicate the treatment variable here. + + Parameters + ---------- + X : np.ndarray + Covariate matrix used to predict the treatment. + t : np.ndarray + Observed treatment indicator (0/1). + coords : dict + Coordinate names for named dimensions of the model. + prior : dict, optional + Prior specification overrides. Defaults to ``{"b": [0, 1]}``. + noncentred : bool, default True + Forwarded to :meth:`build_model`. """ if prior is None: prior = {"b": [0, 1]} @@ -1314,8 +1437,8 @@ def fit_outcome_model( If we wish to winsorize the propensity score this can be set to clip the high and low values of the propensity at 0 + winsorize_boundary and 1-winsorize_boundary - spline_knots: int, default 30 - The number of knots we use in the 0 - 1 interval to create our spline function + spline_knots : int, default 30 + The number of knots we use in the 0 - 1 interval to create our spline function. Returns ------- @@ -1855,6 +1978,9 @@ def predict( Not used, kept for API compatibility. out_of_sample : bool, optional Not used, kept for API compatibility. + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- @@ -1896,6 +2022,8 @@ def score( Target variable with dims ["obs_ind", "treated_units"]. coords : dict, optional Not used, kept for API compatibility. + **kwargs + Forwarded to :meth:`PyMCModel.score`. Returns ------- @@ -2228,6 +2356,9 @@ def predict( Not used directly, datetime extracted from X coordinates. out_of_sample : bool, optional If True, forecast future values. If False, return in-sample predictions. + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- @@ -2299,6 +2430,8 @@ def score( Target variable with dims ["obs_ind", "treated_units"]. coords : dict, optional Not used, kept for API compatibility. + **kwargs + Forwarded to :meth:`PyMCModel.score`. Returns ------- diff --git a/causalpy/skl_models.py b/causalpy/skl_models.py index 7a144e019..a76e4e5a3 100644 --- a/causalpy/skl_models.py +++ b/causalpy/skl_models.py @@ -29,11 +29,25 @@ class ScikitLearnAdaptor: coef_: np.ndarray def calculate_impact(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: - """Calculate the causal impact of the intervention.""" + """Calculate the causal impact of the intervention. + + Parameters + ---------- + y_true : np.ndarray + Observed outcomes. + y_pred : np.ndarray + Counterfactual predictions from the fitted model. + """ return y_true - y_pred def calculate_cumulative_impact(self, impact: np.ndarray) -> np.ndarray: - """Calculate the cumulative impact intervention.""" + """Calculate the cumulative impact intervention. + + Parameters + ---------- + impact : np.ndarray + Per-period impact estimates. + """ return np.cumsum(impact) def print_coefficients( @@ -73,11 +87,29 @@ class WeightedProportion(ScikitLearnAdaptor, LinearModel, RegressorMixin): methods for example""" def loss(self, W: np.ndarray, X: np.ndarray, y: np.ndarray) -> float: - """Compute root mean squared loss with data X, weights W, and predictor y""" + """Compute root mean squared loss with data X, weights W, and predictor y. + + Parameters + ---------- + W : np.ndarray + Convex combination weights. + X : np.ndarray + Donor matrix. + y : np.ndarray + Treated unit outcomes in the pre-treatment period. + """ return np.sqrt(np.mean((y - np.dot(X, W.T)) ** 2)) def fit(self, X: np.ndarray, y: np.ndarray) -> "WeightedProportion": - """Fit model on data X with predictor y""" + """Fit model on data X with predictor y. + + Parameters + ---------- + X : np.ndarray + Donor matrix. + y : np.ndarray + Treated unit outcomes in the pre-treatment period. + """ w_start = [1 / X.shape[1]] * X.shape[1] coef_ = fmin_slsqp( partial(self.loss, X=X, y=y), @@ -91,7 +123,13 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "WeightedProportion": return self def predict(self, X: np.ndarray) -> np.ndarray: - """Predict results for data X""" + """Predict results for data X. + + Parameters + ---------- + X : np.ndarray + Donor matrix to predict from. + """ return np.dot(X, self.coef_.T) @@ -99,7 +137,13 @@ def create_causalpy_compatible_class( estimator: type[RegressorMixin], ) -> type[RegressorMixin]: """This function takes a scikit-learn estimator and returns a new class that is - compatible with CausalPy.""" + compatible with CausalPy. + + Parameters + ---------- + estimator : type[RegressorMixin] + A scikit-learn estimator class to augment. + """ _add_mixin_methods(estimator, ScikitLearnAdaptor) return estimator diff --git a/causalpy/steps/estimate_effect.py b/causalpy/steps/estimate_effect.py index b83a83bcb..b0565c5af 100644 --- a/causalpy/steps/estimate_effect.py +++ b/causalpy/steps/estimate_effect.py @@ -63,6 +63,12 @@ def __init__(self, method: type[BaseExperiment], **kwargs: Any) -> None: def validate(self, context: PipelineContext) -> None: """Check that the step is properly configured. + Parameters + ---------- + context : PipelineContext + Pipeline context (unused at validation time but required by the + pipeline step interface). + Raises ------ TypeError @@ -87,6 +93,12 @@ def run(self, context: PipelineContext) -> PipelineContext: The experiment constructor receives ``context.data`` as its first positional argument, followed by all captured keyword arguments. + Parameters + ---------- + context : PipelineContext + Pipeline context. ``context.data`` is forwarded to the experiment + constructor as the first positional argument. + Returns ------- PipelineContext diff --git a/causalpy/steps/report.py b/causalpy/steps/report.py index 770986cd8..3c01a2c73 100644 --- a/causalpy/steps/report.py +++ b/causalpy/steps/report.py @@ -71,7 +71,13 @@ def __init__( def validate(self, context: PipelineContext) -> None: """GenerateReport has no strict prerequisites; it gracefully handles - missing data.""" + missing data. + + Parameters + ---------- + context : PipelineContext + Pipeline context (unused; required by the step interface). + """ def _render_plot(self, experiment: Any) -> list[str]: """Render experiment plots as base64-encoded PNG strings.""" @@ -90,7 +96,19 @@ def _render_plot(self, experiment: Any) -> list[str]: return plots def run(self, context: PipelineContext) -> PipelineContext: - """Generate the HTML report and store it in the context.""" + """Generate the HTML report and store it in the context. + + Parameters + ---------- + context : PipelineContext + Pipeline context providing ``experiment``, ``effect_summary``, + and ``sensitivity_results`` (any of which may be ``None``). + + Returns + ------- + PipelineContext + The same context with ``report`` populated. + """ env = Environment( loader=FileSystemLoader(str(_TEMPLATE_DIR)), autoescape=True, diff --git a/causalpy/steps/sensitivity.py b/causalpy/steps/sensitivity.py index 644d33aba..5a65bc92f 100644 --- a/causalpy/steps/sensitivity.py +++ b/causalpy/steps/sensitivity.py @@ -43,6 +43,14 @@ def register_default_check( Called by check modules at import time so that ``SensitivityAnalysis.default_for`` can auto-select checks. + + Parameters + ---------- + check_class : type + The check class to register as a default. + experiment_types : set of type[BaseExperiment] + Experiment classes for which ``check_class`` should be applied by + default. """ for exp_type in experiment_types: _DEFAULT_CHECKS.setdefault(exp_type, []).append(check_class) @@ -69,7 +77,18 @@ class SensitivitySummary: @classmethod def from_results(cls, results: list[CheckResult]) -> SensitivitySummary: - """Build a summary from a list of check results.""" + """Build a summary from a list of check results. + + Parameters + ---------- + results : list of CheckResult + Individual results to aggregate. + + Returns + ------- + SensitivitySummary + Aggregated summary covering all supplied results. + """ verdicts = [r.passed for r in results if r.passed is not None] all_passed = all(verdicts) if verdicts else None @@ -127,6 +146,12 @@ def validate(self, context: PipelineContext) -> None: only check structural issues (e.g. that each object satisfies the Check protocol). + Parameters + ---------- + context : PipelineContext + Pipeline context (unused at validation time but required by the + pipeline step interface). + Raises ------ TypeError @@ -142,6 +167,18 @@ def validate(self, context: PipelineContext) -> None: def run(self, context: PipelineContext) -> PipelineContext: """Run all checks against the fitted experiment. + Parameters + ---------- + context : PipelineContext + Pipeline context containing the fitted experiment and any + ``experiment_config`` required by the checks. + + Returns + ------- + PipelineContext + The same context with ``sensitivity_results`` and ``report`` + populated. + Raises ------ RuntimeError diff --git a/causalpy/transforms.py b/causalpy/transforms.py index 2b8ff2245..42792d85e 100644 --- a/causalpy/transforms.py +++ b/causalpy/transforms.py @@ -53,13 +53,11 @@ class StepTransform: from the training data, ensuring consistent behavior when predicting on new data. - Parameters - ---------- - x : array-like - Time values (numeric or datetime) - threshold : numeric, str, or pd.Timestamp - The intervention time. For datetime x, can be a string like - '2020-01-01' which will be parsed as pd.Timestamp. + Notes + ----- + Per the patsy stateful transform protocol, ``x`` and ``threshold`` are + supplied to :meth:`memorize_chunk` and :meth:`transform` rather than to + the constructor; see those methods for parameter details. Examples -------- @@ -88,7 +86,17 @@ def _is_datetime_like(self, x: Any) -> bool: def memorize_chunk( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> None: - """Called during first pass - detect datetime and store origin.""" + """ + Detect datetime and store origin during patsy's first pass. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time. For datetime ``x`` it may be a string + like ``'2020-01-01'`` or a :class:`pd.Timestamp`. + """ if self._is_datetime_like(x): self._is_datetime = True x_dt = pd.to_datetime(x) @@ -106,7 +114,21 @@ def memorize_finish(self) -> None: def transform( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> np.ndarray: - """Transform x into step function values.""" + """ + Transform ``x`` into step function values. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time, in the same domain as ``x``. + + Returns + ------- + np.ndarray + Binary indicator with 1 where ``x >= threshold`` and 0 elsewhere. + """ if self._is_datetime and self._origin is not None: # Convert x to days from origin x_dt = pd.to_datetime(x) @@ -151,12 +173,14 @@ class RampTransform: the threshold can be specified as a string ('2020-01-01') or pd.Timestamp. - Parameters - ---------- - x : array-like - Time values (numeric or datetime) - threshold : numeric, str, or pd.Timestamp - The intervention time. + Notes + ----- + Per the patsy stateful transform protocol, ``x`` and ``threshold`` are + supplied to :meth:`memorize_chunk` and :meth:`transform` rather than to + the constructor; see those methods for parameter details. + + For datetime inputs, the ramp values represent days since the threshold. + This means the slope coefficient will be interpreted as "change per day". Examples -------- @@ -165,11 +189,6 @@ class RampTransform: >>> # Datetime time - ramp is in DAYS >>> formula = "y ~ 1 + date + ramp(date, '2020-06-01')" - - Notes - ----- - For datetime inputs, the ramp values represent days since the threshold. - This means the slope coefficient will be interpreted as "change per day". """ def __init__(self) -> None: @@ -187,7 +206,17 @@ def _is_datetime_like(self, x: Any) -> bool: def memorize_chunk( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> None: - """Called during first pass - detect datetime and store origin.""" + """ + Detect datetime and store origin during patsy's first pass. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time. For datetime ``x`` it may be a string + like ``'2020-01-01'`` or a :class:`pd.Timestamp`. + """ if self._is_datetime_like(x): self._is_datetime = True x_dt = pd.to_datetime(x) @@ -204,7 +233,22 @@ def memorize_finish(self) -> None: def transform( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> np.ndarray: - """Transform x into ramp function values.""" + """ + Transform ``x`` into ramp function values. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time, in the same domain as ``x``. + + Returns + ------- + np.ndarray + Ramp values ``max(0, x - threshold)``. For datetime inputs, the + difference is expressed in days. + """ if self._is_datetime and self._origin is not None: # Convert x to days from origin x_dt = pd.to_datetime(x) diff --git a/causalpy/utils.py b/causalpy/utils.py index 39b886d66..17f1326e4 100644 --- a/causalpy/utils.py +++ b/causalpy/utils.py @@ -382,11 +382,6 @@ def extract_lift_for_mmm( If the model is not a Bayesian (PyMC) model, as uncertainty quantification requires posterior samples. - See Also - -------- - PyMC-Marketing lift test calibration : - https://www.pymc-marketing.io/en/stable/notebooks/mmm/mmm_lift_test.html - Notes ----- This function is designed for integration with PyMC-Marketing's MMM calibration @@ -395,7 +390,9 @@ def extract_lift_for_mmm( with experimental evidence. For more information on lift test calibration in MMMs, see the PyMC-Marketing - documentation: https://github.com/pymc-labs/pymc-marketing + documentation: https://github.com/pymc-labs/pymc-marketing. + Reference workflow: + https://www.pymc-marketing.io/en/stable/notebooks/mmm/mmm_lift_test.html Examples -------- diff --git a/pyproject.toml b/pyproject.toml index d24553b17..00f6d50a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -164,20 +164,18 @@ extend-select = [ ignore-words = "./docs/source/.codespell/codespell-whitelist.txt" skip = "*.ipynb,*.csv,*.svg,pyproject.toml,docs/source/.codespell/codespell-whitelist.txt" -# Numpydoc validation, intentionally narrow. -# Used by the ``numpydoc-validation`` pre-commit hook to enforce that every -# public ``plot()`` method on a :class:`BaseExperiment` subclass keeps its -# numpydoc-style ``Parameters`` block in sync with the function signature -# (issue #886). The exclude regex is a negative lookahead that skips every -# node whose dotted name does *not* end in ``.plot``. The base class -# deliberately offers no public ``plot()`` (the shared dispatcher lives in -# the protected helper ``_render_plot``), so no carve-out is required. +# Numpydoc validation. Originally introduced under #886 as a narrow check on +# public ``.plot`` overrides, expanded under #898 to enforce a curated set of +# numpydoc rules across the whole ``causalpy`` package. The ``exclude`` regex +# matches any dotted node whose path contains a private (underscore-prefixed) +# component — private helpers, dunders, and members of private modules are +# considered implementation detail and not subject to docstring validation. [tool.numpydoc_validation] checks = [ "PR01", # Parameters not documented "PR02", # Unknown parameters ] -exclude = ['^(?!.*\.plot$).*$'] +exclude = ['\._'] [tool.coverage.run] source = ["causalpy"] From a66d0ecd7aa9c5d34c74cff75961d00582304c34 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 20:57:05 +0100 Subject: [PATCH 2/8] Enable PR04 (parameter has no type) numpydoc check (#898) Add ``PR04`` to the numpydoc-validation check set so parameter rows missing a type are caught going forward. No code changes are required: every documented parameter under the package-wide scope already declares a type. Co-authored-by: Cursor --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 00f6d50a1..99eb80e16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,6 +174,7 @@ skip = "*.ipynb,*.csv,*.svg,pyproject.toml,docs/source/.codespell/codespell-whit checks = [ "PR01", # Parameters not documented "PR02", # Unknown parameters + "PR04", # Parameter has no type ] exclude = ['\._'] From 9aaf645a5f493073e170b68ffaf22d1dc998020a Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 20:57:22 +0100 Subject: [PATCH 3/8] Enable PR07 (parameter has no description) numpydoc check (#898) Add ``PR07`` to the numpydoc-validation check set so undocumented parameter rows are caught going forward. No code changes are required: every parameter row under the package-wide scope already carries a description. Co-authored-by: Cursor --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 99eb80e16..a771ed6e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,6 +175,7 @@ checks = [ "PR01", # Parameters not documented "PR02", # Unknown parameters "PR04", # Parameter has no type + "PR07", # Parameter has no description ] exclude = ['\._'] From dffe82851f69f7dbc5c6b0c2499e7681237b91ee Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 20:57:40 +0100 Subject: [PATCH 4/8] Enable PR10 (space before colon) numpydoc check (#898) Add ``PR10`` to the numpydoc-validation check set so parameter rows that omit a space before the type colon are caught going forward. No code changes are required: existing docstrings in the broadened scope already conform. Co-authored-by: Cursor --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a771ed6e3..7b2c72d1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -176,6 +176,7 @@ checks = [ "PR02", # Unknown parameters "PR04", # Parameter has no type "PR07", # Parameter has no description + "PR10", # Parameter requires a space before the colon ] exclude = ['\._'] From 1bc0d5eb9e9f602b619a841a66dee0cf97771290 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 20:58:08 +0100 Subject: [PATCH 5/8] Enable RT03 (return value has no description) numpydoc check (#898) Add ``RT03`` to the numpydoc-validation check set so undocumented return rows are caught going forward, and flesh out the bare ``CheckResult`` return entry on the ``Check`` protocol's ``run`` method (the only violation surfaced under the package-wide scope). Co-authored-by: Cursor --- causalpy/checks/base.py | 2 ++ pyproject.toml | 1 + 2 files changed, 3 insertions(+) diff --git a/causalpy/checks/base.py b/causalpy/checks/base.py index 5c238a2cc..dab08f936 100644 --- a/causalpy/checks/base.py +++ b/causalpy/checks/base.py @@ -129,5 +129,7 @@ def run( Returns ------- CheckResult + Outcome of the check, including pass/fail status and any + diagnostic payload produced by the implementation. """ ... diff --git a/pyproject.toml b/pyproject.toml index 7b2c72d1b..36809086a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -177,6 +177,7 @@ checks = [ "PR04", # Parameter has no type "PR07", # Parameter has no description "PR10", # Parameter requires a space before the colon + "RT03", # Return value has no description ] exclude = ['\._'] From 2636939952288fd8297c9ef182f769aa92b39ab3 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 21:01:06 +0100 Subject: [PATCH 6/8] Enable SS03 (summary ends with period) numpydoc check (#898) Add ``SS03`` to the numpydoc-validation check set so first-line summaries that omit a trailing period are caught going forward, and update every docstring under the package-wide scope that previously violated the rule (mostly module-level docstrings and a handful of method summaries). Co-authored-by: Cursor --- causalpy/experiments/__init__.py | 2 +- causalpy/experiments/diff_in_diff.py | 6 ++---- causalpy/experiments/instrumental_variable.py | 12 ++++-------- causalpy/experiments/interrupted_time_series.py | 4 +--- causalpy/experiments/inverse_propensity_weighting.py | 4 +--- causalpy/experiments/panel_regression.py | 4 +--- causalpy/experiments/piecewise_its.py | 4 +--- causalpy/experiments/prepostnegd.py | 6 ++---- causalpy/experiments/regression_discontinuity.py | 6 ++---- causalpy/experiments/regression_kink.py | 6 ++---- causalpy/experiments/staggered_did.py | 3 +-- causalpy/experiments/synthetic_control.py | 4 +--- causalpy/pymc_models.py | 7 +++---- causalpy/skl_models.py | 8 +++++--- causalpy/utils.py | 4 +--- causalpy/version.py | 2 +- pyproject.toml | 1 + 17 files changed, 30 insertions(+), 53 deletions(-) diff --git a/causalpy/experiments/__init__.py b/causalpy/experiments/__init__.py index 66b98eda1..b73902289 100644 --- a/causalpy/experiments/__init__.py +++ b/causalpy/experiments/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""CausalPy experiment module""" +"""CausalPy experiment module.""" from .diff_in_diff import DifferenceInDifferences from .instrumental_variable import InstrumentalVariable diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py index 350cfba22..dffdbca88 100644 --- a/causalpy/experiments/diff_in_diff.py +++ b/causalpy/experiments/diff_in_diff.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Difference in differences -""" +"""Difference in differences.""" from typing import Any, Literal @@ -263,7 +261,7 @@ def algorithm(self) -> None: raise ValueError("Model type not recognized") def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" # Validate formula structure and interaction interaction terms self._validate_formula_interaction_terms() # Check if post_treatment_variable_name is in formula diff --git a/causalpy/experiments/instrumental_variable.py b/causalpy/experiments/instrumental_variable.py index 67bc0faa3..76ccde8ce 100644 --- a/causalpy/experiments/instrumental_variable.py +++ b/causalpy/experiments/instrumental_variable.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Instrumental variable regression -""" +"""Instrumental variable regression.""" import warnings # noqa: I001 @@ -203,7 +201,7 @@ def algorithm(self) -> None: ) def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" treatment = self.instruments_formula.split("~")[0] test = treatment.strip() in self.instruments_data.columns test = test & (treatment.strip() in self.data.columns) @@ -226,8 +224,7 @@ def input_validation(self) -> None: ) def get_2SLS_fit(self) -> None: - """ - Two Stage Least Squares Fit + """Two Stage Least Squares Fit. This function is called by the experiment, results are used for priors if none are provided. @@ -248,8 +245,7 @@ def get_2SLS_fit(self) -> None: self.second_stage_reg = second_stage_reg def get_naive_OLS_fit(self) -> None: - """ - Naive Ordinary Least Squares + """Naive Ordinary Least Squares. This function is called by the experiment. """ diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py index 111a2e83d..c8d4f961b 100644 --- a/causalpy/experiments/interrupted_time_series.py +++ b/causalpy/experiments/interrupted_time_series.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Interrupted Time Series Analysis -""" +"""Interrupted Time Series Analysis.""" from typing import Any, Literal diff --git a/causalpy/experiments/inverse_propensity_weighting.py b/causalpy/experiments/inverse_propensity_weighting.py index 107c17f67..06aa70c54 100644 --- a/causalpy/experiments/inverse_propensity_weighting.py +++ b/causalpy/experiments/inverse_propensity_weighting.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Inverse propensity weighting -""" +"""Inverse propensity weighting.""" import warnings from typing import Any, Literal diff --git a/causalpy/experiments/panel_regression.py b/causalpy/experiments/panel_regression.py index 815f37fa1..5a7fa07b1 100644 --- a/causalpy/experiments/panel_regression.py +++ b/causalpy/experiments/panel_regression.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Panel Regression with Fixed Effects -""" +"""Panel Regression with Fixed Effects.""" from typing import Any, Literal diff --git a/causalpy/experiments/piecewise_its.py b/causalpy/experiments/piecewise_its.py index 724c38efe..02e1e1e1a 100644 --- a/causalpy/experiments/piecewise_its.py +++ b/causalpy/experiments/piecewise_its.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Piecewise Interrupted Time Series Analysis (Segmented Regression) -""" +"""Piecewise Interrupted Time Series Analysis (Segmented Regression).""" import re from typing import Any, Literal diff --git a/causalpy/experiments/prepostnegd.py b/causalpy/experiments/prepostnegd.py index ece660c06..8bbc808cf 100644 --- a/causalpy/experiments/prepostnegd.py +++ b/causalpy/experiments/prepostnegd.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Pretest/posttest nonequivalent group design -""" +"""Pretest/posttest nonequivalent group design.""" from typing import Any, Literal @@ -193,7 +191,7 @@ def algorithm(self) -> None: ) def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" if not _is_variable_dummy_coded(self.data[self.group_variable_name]): raise DataException( f""" diff --git a/causalpy/experiments/regression_discontinuity.py b/causalpy/experiments/regression_discontinuity.py index 17e28d658..56fd157a8 100644 --- a/causalpy/experiments/regression_discontinuity.py +++ b/causalpy/experiments/regression_discontinuity.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Regression discontinuity design -""" +"""Regression discontinuity design.""" import warnings # noqa: I001 from typing import Any, Literal @@ -245,7 +243,7 @@ def algorithm(self) -> None: # ****************************************************************************** def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" if "treated" not in self.formula: raise FormulaException( "A predictor called `treated` should be in the formula" diff --git a/causalpy/experiments/regression_kink.py b/causalpy/experiments/regression_kink.py index d107805ec..7b5e47634 100644 --- a/causalpy/experiments/regression_kink.py +++ b/causalpy/experiments/regression_kink.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Regression kink design -""" +"""Regression kink design.""" import warnings # noqa: I001 @@ -167,7 +165,7 @@ def algorithm(self) -> None: ) def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" if "treated" not in self.formula: raise FormulaException( "A predictor called `treated` should be in the formula" diff --git a/causalpy/experiments/staggered_did.py b/causalpy/experiments/staggered_did.py index 06b047143..c66a79bc7 100644 --- a/causalpy/experiments/staggered_did.py +++ b/causalpy/experiments/staggered_did.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Staggered Difference in Differences (Imputation-based) +"""Staggered Difference in Differences (Imputation-based). This module implements the imputation-based staggered DiD estimator, following the approach of Borusyak, Jaravel, and Spiess (2024). It handles settings where diff --git a/causalpy/experiments/synthetic_control.py b/causalpy/experiments/synthetic_control.py index a69e2e5b4..2937b9520 100644 --- a/causalpy/experiments/synthetic_control.py +++ b/causalpy/experiments/synthetic_control.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Synthetic Control Experiment -""" +"""Synthetic Control Experiment.""" import warnings from typing import Any, Literal diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index 276902153..5f49f420c 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Custom PyMC models for causal inference""" +"""Custom PyMC models for causal inference.""" import inspect import warnings @@ -956,7 +956,7 @@ def build_model( class InstrumentalVariableRegression(PyMCModel): - """Custom PyMC model for instrumental linear regression + """Custom PyMC model for instrumental linear regression. Example -------- @@ -1270,8 +1270,7 @@ def fit( # type: ignore[override] class PropensityScore(PyMCModel): - r""" - Custom PyMC model for inverse propensity score models + r"""Custom PyMC model for inverse propensity score models. .. note: Generally, the `.fit()` method should be used rather than diff --git a/causalpy/skl_models.py b/causalpy/skl_models.py index a76e4e5a3..b99ee5e3e 100644 --- a/causalpy/skl_models.py +++ b/causalpy/skl_models.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Custom scikit-learn models for causal inference""" +"""Custom scikit-learn models for causal inference.""" from functools import partial @@ -83,8 +83,10 @@ def get_coeffs(self) -> np.ndarray: class WeightedProportion(ScikitLearnAdaptor, LinearModel, RegressorMixin): - """Weighted proportion model for causal inference. Used for synthetic control - methods for example""" + """Weighted proportion model for causal inference. + + Used for synthetic control methods, for example. + """ def loss(self, W: np.ndarray, X: np.ndarray, y: np.ndarray) -> float: """Compute root mean squared loss with data X, weights W, and predictor y. diff --git a/causalpy/utils.py b/causalpy/utils.py index 17f1326e4..6eeeb4d84 100644 --- a/causalpy/utils.py +++ b/causalpy/utils.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Utility functions -""" +"""Utility functions.""" from __future__ import annotations diff --git a/causalpy/version.py b/causalpy/version.py index e721edb61..ca088be3d 100644 --- a/causalpy/version.py +++ b/causalpy/version.py @@ -11,6 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""CausalPy Version""" +"""CausalPy Version.""" __version__ = "0.8.0" diff --git a/pyproject.toml b/pyproject.toml index 36809086a..8436691ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -178,6 +178,7 @@ checks = [ "PR07", # Parameter has no description "PR10", # Parameter requires a space before the colon "RT03", # Return value has no description + "SS03", # Summary does not end with a period ] exclude = ['\._'] From 9c75bc0284c9a218e41998681187209330352406 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 21:03:24 +0100 Subject: [PATCH 7/8] Enable GL07 (section ordering) numpydoc check (#898) Add ``GL07`` to the numpydoc-validation check set so out-of-order docstring sections are caught going forward. Reorder ``Examples``/``Notes``/``See Also`` blocks in ``InterruptedTimeSeries``, ``PiecewiseITS``, ``PyMCModel.priors_from_data``, and the variable-selection-prior classes (the latter previously used a non-standard singular ``Example`` heading), all to match numpydoc's canonical ordering. Co-authored-by: Cursor --- .../experiments/interrupted_time_series.py | 38 +++++++++---------- causalpy/experiments/piecewise_its.py | 34 ++++++++--------- causalpy/pymc_models.py | 10 ++--- causalpy/variable_selection_priors.py | 16 ++++---- pyproject.toml | 1 + 5 files changed, 50 insertions(+), 49 deletions(-) diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py index c8d4f961b..149c81ede 100644 --- a/causalpy/experiments/interrupted_time_series.py +++ b/causalpy/experiments/interrupted_time_series.py @@ -67,6 +67,25 @@ class InterruptedTimeSeries(BaseExperiment): **kwargs : dict Additional keyword arguments passed to the model. + Notes + ----- + For Bayesian models, the causal impact is calculated using the posterior expectation + (``mu``) rather than the posterior predictive (``y_hat``). This means the impact and + its uncertainty represent the systematic causal effect, excluding observation-level + noise. The uncertainty bands in the plots reflect parameter uncertainty and + counterfactual prediction uncertainty, but not individual observation variability. + + The three-period design is useful for analyzing temporary interventions such as: + + - Marketing campaigns with defined start and end dates + - Policy trials or pilot programs + - Clinical treatments with limited duration + - Seasonal interventions + + Use ``effect_summary(period="intervention")`` to analyze effects during the + intervention, and ``effect_summary(period="post")`` to analyze effect persistence + after the intervention ends. + Examples -------- **Two-period design (permanent intervention):** @@ -103,25 +122,6 @@ class InterruptedTimeSeries(BaseExperiment): >>> # Get period-specific effect summaries >>> intervention_summary = result.effect_summary(period="intervention") >>> post_summary = result.effect_summary(period="post") - - Notes - ----- - For Bayesian models, the causal impact is calculated using the posterior expectation - (``mu``) rather than the posterior predictive (``y_hat``). This means the impact and - its uncertainty represent the systematic causal effect, excluding observation-level - noise. The uncertainty bands in the plots reflect parameter uncertainty and - counterfactual prediction uncertainty, but not individual observation variability. - - The three-period design is useful for analyzing temporary interventions such as: - - - Marketing campaigns with defined start and end dates - - Policy trials or pilot programs - - Clinical treatments with limited duration - - Seasonal interventions - - Use ``effect_summary(period="intervention")`` to analyze effects during the - intervention, and ``effect_summary(period="post")`` to analyze effect persistence - after the intervention ends. """ supports_ols = True diff --git a/causalpy/experiments/piecewise_its.py b/causalpy/experiments/piecewise_its.py index 02e1e1e1a..3764702ad 100644 --- a/causalpy/experiments/piecewise_its.py +++ b/causalpy/experiments/piecewise_its.py @@ -81,6 +81,23 @@ class PiecewiseITS(BaseExperiment): cumulative_effect : xr.DataArray or np.ndarray Cumulative causal effect over time. + Notes + ----- + The counterfactual is computed by setting all step/ramp terms to zero, + representing what would have happened without the interventions. + + The `step` and `ramp` transforms are patsy stateful transforms that handle + both numeric and datetime time columns. For datetime, thresholds can be + specified as strings (e.g., '2020-06-01') or pd.Timestamp objects. + + References + ---------- + - Wagner AK, et al. (2002). Segmented regression analysis of interrupted + time series studies in medication use research. Journal of Clinical + Pharmacy and Therapeutics. + - Lopez Bernal J, et al. (2017). Interrupted time series regression for + the evaluation of public health interventions: a tutorial. Int J Epidemiol. + Examples -------- >>> import causalpy as cp @@ -122,23 +139,6 @@ class PiecewiseITS(BaseExperiment): ... formula="y ~ 1 + date + step(date, '2020-02-20') + ramp(date, '2020-02-20')", ... model=..., ... ) # doctest: +SKIP - - Notes - ----- - The counterfactual is computed by setting all step/ramp terms to zero, - representing what would have happened without the interventions. - - The `step` and `ramp` transforms are patsy stateful transforms that handle - both numeric and datetime time columns. For datetime, thresholds can be - specified as strings (e.g., '2020-06-01') or pd.Timestamp objects. - - References - ---------- - - Wagner AK, et al. (2002). Segmented regression analysis of interrupted - time series studies in medication use research. Journal of Clinical - Pharmacy and Therapeutics. - - Lopez Bernal J, et al. (2017). Interrupted time series regression for - the evaluation of public health interventions: a tutorial. Int J Epidemiol. """ supports_ols = True diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index 5f49f420c..003d56056 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -173,6 +173,11 @@ def priors_from_data(self, X, y) -> dict[str, Any]: Dictionary mapping parameter names to Prior objects. The keys should match parameter names used in the model's `build_model()` method. + See Also + -------- + WeightedSumFitter.priors_from_data : Example implementation that sets + Dirichlet prior shape based on number of control units. + Notes ----- The base implementation returns an empty dictionary, meaning no @@ -211,11 +216,6 @@ def priors_from_data(self, X, y) -> dict[str, Any]: ... dims=["treated_units", "coeffs"], ... ) ... } - - See Also - -------- - WeightedSumFitter.priors_from_data : Example implementation that sets - Dirichlet prior shape based on number of control units. """ return {} diff --git a/causalpy/variable_selection_priors.py b/causalpy/variable_selection_priors.py index 5cc304847..b4be7f793 100644 --- a/causalpy/variable_selection_priors.py +++ b/causalpy/variable_selection_priors.py @@ -84,8 +84,8 @@ class SpikeAndSlabPrior: dims : str or tuple, optional Dimension names for the coefficient vector - Example - ------- + Examples + -------- >>> import pymc as pm >>> from causalpy.variable_selection_priors import SpikeAndSlabPrior >>> spike_slab = SpikeAndSlabPrior(dims="features") @@ -164,8 +164,8 @@ class HorseshoePrior: dims : str or tuple, optional Dimension names for the coefficient vector - Example - ------- + Examples + -------- >>> import pymc as pm >>> from causalpy.variable_selection_priors import HorseshoePrior >>> horseshoe = HorseshoePrior(dims="features") @@ -269,8 +269,8 @@ class VariableSelectionPrior: - mu: float or array (default=0) - Prior mean - sigma: float or array (default=1) - Prior SD - Example - ------- + Examples + -------- >>> import pymc as pm >>> from causalpy.variable_selection_priors import VariableSelectionPrior >>> # Create spike-and-slab prior @@ -379,8 +379,8 @@ def create_prior( PyMC variable The coefficient vector with the specified prior - Example - ------- + Examples + -------- >>> import pymc as pm >>> import pandas as pd >>> from causalpy.variable_selection_priors import VariableSelectionPrior diff --git a/pyproject.toml b/pyproject.toml index 8436691ed..237f94ef4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -179,6 +179,7 @@ checks = [ "PR10", # Parameter requires a space before the colon "RT03", # Return value has no description "SS03", # Summary does not end with a period + "GL07", # Sections are in the wrong order ] exclude = ['\._'] From 1f430136e639c10abaea7e6b849f342834a3d53b Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Fri, 1 May 2026 21:06:30 +0100 Subject: [PATCH 8/8] Enable GL08 and replace interrogate with numpydoc-validation (#898) Add ``GL08`` (object has no docstring) to the numpydoc-validation check set so missing docstrings are caught the same way every other rule is, and retire the standalone ``interrogate`` hook now that ``numpydoc-validation`` covers the same ground (and more) for the public API. Concretely: - Drop the ``interrogate`` pre-commit hook, the ``[tool.interrogate]`` section in ``pyproject.toml``, the ``interrogate`` extras entries, the ``interrogate-badge`` workflow, the ``Makefile`` invocation, and the README badge. - Add a top-level package docstring in ``causalpy/__init__.py`` and flesh out abstract ``fit``/``build_model``/``calculate_cumulative_impact`` stubs that previously had no docstring. - Rename the nested ``plot_weights``/``make_hists`` helpers in ``InversePropensityWeighting._plot_weights_propensity`` to a leading underscore so they fall under the configured private exclude. Co-authored-by: Cursor --- .github/workflows/interrogate-badge.yml | 54 ------------------- .pre-commit-config.yaml | 7 --- Makefile | 1 - README.md | 1 - causalpy/__init__.py | 1 + causalpy/experiments/base.py | 18 +++++++ .../inverse_propensity_weighting.py | 10 ++-- causalpy/pymc_models.py | 33 ++++++++++++ environment.yml | 1 - pyproject.toml | 25 +-------- 10 files changed, 59 insertions(+), 92 deletions(-) delete mode 100644 .github/workflows/interrogate-badge.yml diff --git a/.github/workflows/interrogate-badge.yml b/.github/workflows/interrogate-badge.yml deleted file mode 100644 index f083afd0a..000000000 --- a/.github/workflows/interrogate-badge.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: interrogate-badge - -on: - push: - branches: [main] - paths: - - "causalpy/**" - - "pyproject.toml" - workflow_dispatch: - -permissions: {} - -jobs: - generate: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: "3.14" - - - name: Install interrogate - run: pip install interrogate==1.7.0 - - - name: Generate badge - run: interrogate causalpy/ --generate-badge . --badge-format svg - - - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: interrogate-badge - path: interrogate_badge.svg - - publish: - needs: generate - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: # zizmor: ignore[artipacked] only git operations, no third-party code - ref: interrogate-badges - - - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: interrogate-badge - - - uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v7.1.0 - with: - commit_message: Update interrogate badge - branch: interrogate-badges - file_pattern: interrogate_badge.svg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ce79564cf..5ba2ce023 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,13 +42,6 @@ repos: # Run the formatter - id: ruff-format types_or: [ python, pyi, jupyter ] - - repo: https://github.com/econchick/interrogate - rev: 1.7.0 - hooks: - - id: interrogate - # needed to make excludes in pyproject.toml work - # see here https://github.com/econchick/interrogate/issues/60#issuecomment-735436566 - pass_filenames: false - repo: https://github.com/codespell-project/codespell rev: v2.4.2 hooks: diff --git a/Makefile b/Makefile index 59ed2c943..946f9c7f5 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,6 @@ lint: ## Run ruff linter and formatter check_lint: ## Check code formatting and linting without making changes ruff check . ruff format --diff --check . - interrogate . doctest: ## Run doctests for the causalpy module python -m pytest --doctest-modules --ignore=causalpy/tests/ causalpy/ --config-file=causalpy/tests/conftest.py diff --git a/README.md b/README.md index 2a6af9306..000add343 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ ![Build Status](https://github.com/pymc-labs/CausalPy/actions/workflows/ci.yml/badge.svg?branch=main) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) -![Interrogate](https://raw.githubusercontent.com/pymc-labs/CausalPy/interrogate-badges/interrogate_badge.svg) [![codecov](https://codecov.io/gh/pymc-labs/CausalPy/branch/main/graph/badge.svg?token=FDKNAY5CZ9)](https://codecov.io/gh/pymc-labs/CausalPy) ![GitHub Repo stars](https://img.shields.io/github/stars/pymc-labs/causalpy?style=flat) diff --git a/causalpy/__init__.py b/causalpy/__init__.py index e8b0e0aa9..40110bae0 100644 --- a/causalpy/__init__.py +++ b/causalpy/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""CausalPy: causal inference for quasi-experiments in Python.""" import causalpy.checks as checks # noqa: E402 import causalpy.pymc_models as pymc_models diff --git a/causalpy/experiments/base.py b/causalpy/experiments/base.py index b217436d2..f96e93988 100644 --- a/causalpy/experiments/base.py +++ b/causalpy/experiments/base.py @@ -142,6 +142,24 @@ def __init__(self, model: PyMCModel | RegressorMixin | None = None) -> None: raise ValueError("OLS models not supported.") def fit(self, *args: Any, **kwargs: Any) -> None: + """Fit the underlying model. + + Subclasses must override this hook to delegate to their concrete + fitting routine; the base class only provides the abstract entry + point. + + Parameters + ---------- + *args : Any + Positional arguments forwarded to the subclass implementation. + **kwargs : Any + Keyword arguments forwarded to the subclass implementation. + + Raises + ------ + NotImplementedError + Always, when called on the base class. + """ raise NotImplementedError("fit method not implemented") @property diff --git a/causalpy/experiments/inverse_propensity_weighting.py b/causalpy/experiments/inverse_propensity_weighting.py index 06aa70c54..85fb4d459 100644 --- a/causalpy/experiments/inverse_propensity_weighting.py +++ b/causalpy/experiments/inverse_propensity_weighting.py @@ -593,7 +593,7 @@ def plot_ate( if method is None: method = self.weighting_scheme - def plot_weights(bins, top0, top1, ax, color="population"): + def _plot_weights(bins, top0, top1, ax, color="population"): colors_dict = { "population": ["orange", "skyblue", 0.6], "pseudo_population": ["grey", "grey", 0.1], @@ -619,7 +619,7 @@ def plot_weights(bins, top0, top1, ax, color="population"): for bar in bars: bar.set_edgecolor("black") - def make_hists(idata, i, axs, method=method): + def _make_hists(idata, i, axs, method=method): p_i = az.extract(idata)["p"][:, i].values if method == "raw": weight0 = 1 / (1 - p_i[self.t.flatten() == 0]) @@ -636,14 +636,14 @@ def make_hists(idata, i, axs, method=method): bins = np.arange(0.025, 0.99, 0.005) top0, _ = np.histogram(p_i[self.t.flatten() == 0], bins=bins) top1, _ = np.histogram(p_i[self.t.flatten() == 1], bins=bins) - plot_weights(bins, top0, top1, axs[0]) + _plot_weights(bins, top0, top1, axs[0]) top0, _ = np.histogram( p_i[self.t.flatten() == 0], bins=bins, weights=weight0 ) top1, _ = np.histogram( p_i[self.t.flatten() == 1], bins=bins, weights=weight1 ) - plot_weights(bins, top0, top1, axs[0], color="pseudo_population") + _plot_weights(bins, top0, top1, axs[0], color="pseudo_population") mosaic = """AAAAAA BBBBCC""" @@ -674,7 +674,7 @@ def make_hists(idata, i, axs, method=method): ["Treatment PS", "Control PS", "Weighted Pseudo Population", "Extreme PS"], ) - [make_hists(idata, i, axs) for i in range(prop_draws)] + [_make_hists(idata, i, axs) for i in range(prop_draws)] ate_df = pd.DataFrame( [self.get_ate(i, idata, method=method) for i in range(ate_draws)], columns=["ATE", "Y(1)", "Y(0)"], diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index 003d56056..3e27291bb 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -257,6 +257,26 @@ def _clone(self) -> "PyMCModel": def build_model( self, X: xr.DataArray, y: xr.DataArray, coords: dict[str, Any] | None ) -> None: + """Construct the PyMC model graph. + + Subclasses must override this method to declare priors, deterministic + nodes, and the likelihood for the model. + + Parameters + ---------- + X : xarray.DataArray + Input features with dimensions ``["obs_ind", "coeffs"]``. + y : xarray.DataArray + Target variable with dimensions ``["obs_ind", "treated_units"]``. + coords : dict or None + Mapping of named dimensions to coordinate labels for the + underlying ``pm.Model``. + + Raises + ------ + NotImplementedError + Always, when called on the base class. + """ raise NotImplementedError( "This method must be implemented by a subclass" ) # pragma: no cover @@ -483,6 +503,19 @@ def calculate_impact( return impact.transpose(..., "obs_ind") def calculate_cumulative_impact(self, impact: xr.DataArray) -> xr.DataArray: + """Cumulative sum of pointwise causal impact along ``obs_ind``. + + Parameters + ---------- + impact : xarray.DataArray + Pointwise causal impact, typically the output of + :meth:`calculate_impact`. + + Returns + ------- + xarray.DataArray + Cumulative impact accumulated along the ``obs_ind`` dimension. + """ return impact.cumsum(dim="obs_ind") def print_coefficients( diff --git a/environment.yml b/environment.yml index 638cab298..cfe114d7d 100644 --- a/environment.yml +++ b/environment.yml @@ -15,7 +15,6 @@ dependencies: - arviz<1.0,>=0.14.0 - codespell - graphviz - - interrogate - ipython!=8.7.0 - jinja2 - make diff --git a/pyproject.toml b/pyproject.toml index 237f94ef4..c2dcedd57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,6 @@ dependencies = [ dev = [ "prek", "twine", - "interrogate", "codespell", "nbformat", "nbconvert", @@ -96,7 +95,7 @@ docs = [ "sphinx-design", "sphinx-togglebutton", ] -lint = ["interrogate", "prek", "ruff", "mypy"] +lint = ["prek", "ruff", "mypy"] test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert", "papermill"] [tool.pyproject2conda] @@ -124,27 +123,6 @@ markers = [ "slow: mark test as slow.", ] -[tool.interrogate] -ignore-init-method = true -ignore-init-module = true -ignore-magic = false -ignore-semiprivate = false -ignore-private = false -ignore-property-decorators = false -ignore-module = false -ignore-nested-functions = false -ignore-nested-classes = true -ignore-setters = false -fail-under = 85 -exclude = ["setup.py", "docs", "build", "dist"] -ignore-regex = ["^get$", "^mock_.*", ".*BaseClass.*"] -# possible values: 0 (minimal output), 1 (-v), 2 (-vv) -verbose = 1 -quiet = false -whitelist-regex = [] -color = true -omit-covered-files = false - [tool.ruff.format] docstring-code-format = true @@ -180,6 +158,7 @@ checks = [ "RT03", # Return value has no description "SS03", # Summary does not end with a period "GL07", # Sections are in the wrong order + "GL08", # Object does not have a docstring ] exclude = ['\._']