diff --git a/.github/workflows/interrogate-badge.yml b/.github/workflows/interrogate-badge.yml deleted file mode 100644 index f083afd0a..000000000 --- a/.github/workflows/interrogate-badge.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: interrogate-badge - -on: - push: - branches: [main] - paths: - - "causalpy/**" - - "pyproject.toml" - workflow_dispatch: - -permissions: {} - -jobs: - generate: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: "3.14" - - - name: Install interrogate - run: pip install interrogate==1.7.0 - - - name: Generate badge - run: interrogate causalpy/ --generate-badge . --badge-format svg - - - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: interrogate-badge - path: interrogate_badge.svg - - publish: - needs: generate - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: # zizmor: ignore[artipacked] only git operations, no third-party code - ref: interrogate-badges - - - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: interrogate-badge - - - uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v7.1.0 - with: - commit_message: Update interrogate badge - branch: interrogate-badges - file_pattern: interrogate_badge.svg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a5f7d5c68..5ba2ce023 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,13 +42,6 @@ repos: # Run the formatter - id: ruff-format types_or: [ python, pyi, jupyter ] - - repo: https://github.com/econchick/interrogate - rev: 1.7.0 - hooks: - - id: interrogate - # needed to make excludes in pyproject.toml work - # see here https://github.com/econchick/interrogate/issues/60#issuecomment-735436566 - pass_filenames: false - repo: https://github.com/codespell-project/codespell rev: v2.4.2 hooks: @@ -56,17 +49,20 @@ repos: additional_dependencies: # Support pyproject.toml configuration - tomli - # Validate that every public ``plot()`` override on a BaseExperiment subclass - # keeps its numpydoc Parameters block in sync with the function signature - # (issue #886). Configuration lives under [tool.numpydoc_validation] in - # pyproject.toml and is intentionally narrow: only ``.plot`` methods are - # checked, only PR01/PR02 are enforced, and the base ``BaseExperiment.plot`` - # is excluded because it uses ``*args, **kwargs`` for dispatch. + # Validate numpydoc-style docstrings across the package. The check set and + # exclude regex are configured under [tool.numpydoc_validation] in + # pyproject.toml. Originally introduced under #886 as a narrow check on + # public ``.plot`` overrides, the scope was widened under #898 to cover the + # whole ``causalpy`` package. The ``files`` pattern below excludes the test + # suite and the simulated-data helpers in ``causalpy/data/`` (those use the + # Sphinx ``:param:`` style for didactic clarity and are not part of the + # public API surface). Private members are excluded via the config-level + # regex in ``pyproject.toml``. - repo: https://github.com/numpy/numpydoc rev: v1.10.0 hooks: - id: numpydoc-validation - files: ^causalpy/experiments/.*\.py$ + files: ^causalpy/(?!tests/|data/).*\.py$ - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.20.1 hooks: diff --git a/Makefile b/Makefile index 59ed2c943..946f9c7f5 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,6 @@ lint: ## Run ruff linter and formatter check_lint: ## Check code formatting and linting without making changes ruff check . ruff format --diff --check . - interrogate . doctest: ## Run doctests for the causalpy module python -m pytest --doctest-modules --ignore=causalpy/tests/ causalpy/ --config-file=causalpy/tests/conftest.py diff --git a/README.md b/README.md index 2a6af9306..000add343 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ ![Build Status](https://github.com/pymc-labs/CausalPy/actions/workflows/ci.yml/badge.svg?branch=main) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) -![Interrogate](https://raw.githubusercontent.com/pymc-labs/CausalPy/interrogate-badges/interrogate_badge.svg) [![codecov](https://codecov.io/gh/pymc-labs/CausalPy/branch/main/graph/badge.svg?token=FDKNAY5CZ9)](https://codecov.io/gh/pymc-labs/CausalPy) ![GitHub Repo stars](https://img.shields.io/github/stars/pymc-labs/causalpy?style=flat) diff --git a/causalpy/__init__.py b/causalpy/__init__.py index e8b0e0aa9..40110bae0 100644 --- a/causalpy/__init__.py +++ b/causalpy/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""CausalPy: causal inference for quasi-experiments in Python.""" import causalpy.checks as checks # noqa: E402 import causalpy.pymc_models as pymc_models diff --git a/causalpy/checks/bandwidth.py b/causalpy/checks/bandwidth.py index a3d24ced7..6212e54c4 100644 --- a/causalpy/checks/bandwidth.py +++ b/causalpy/checks/bandwidth.py @@ -60,7 +60,13 @@ def __init__(self, bandwidths: list[float] | None = None) -> None: self.bandwidths = bandwidths or [0.25, 0.5, 1.0, 2.0, np.inf] def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is an RD or RKink instance.""" + """Verify the experiment is an RD or RKink instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, (RegressionDiscontinuity, RegressionKink)): raise TypeError( "BandwidthSensitivity requires a RegressionDiscontinuity " @@ -72,7 +78,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Re-fit the experiment at multiple bandwidths and compare estimates.""" + """Re-fit the experiment at multiple bandwidths and compare estimates. + + Parameters + ---------- + experiment : BaseExperiment + The fitted RD or RKink experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/checks/base.py b/causalpy/checks/base.py index dfb5f8036..dab08f936 100644 --- a/causalpy/checks/base.py +++ b/causalpy/checks/base.py @@ -37,6 +37,17 @@ def clone_model(model: Any) -> Any: PyMC models cannot survive ``copy.deepcopy`` (the class identity is lost), so we use their ``_clone()`` method instead. For all other model types we fall back to ``copy.deepcopy``. + + Parameters + ---------- + model : Any + The model instance to clone. PyMC models must expose a ``_clone()`` + method; everything else falls back to :func:`copy.deepcopy`. + + Returns + ------- + Any + A fresh, unfitted copy of ``model``. """ if hasattr(model, "_clone"): return model._clone() @@ -88,6 +99,12 @@ class Check(Protocol): def validate(self, experiment: BaseExperiment) -> None: """Verify the check is applicable to the given experiment. + Parameters + ---------- + experiment : BaseExperiment + The experiment instance whose type is checked against + ``applicable_methods``. + Raises ------ TypeError @@ -112,5 +129,7 @@ def run( Returns ------- CheckResult + Outcome of the check, including pass/fail status and any + diagnostic payload produced by the implementation. """ ... diff --git a/causalpy/checks/convex_hull.py b/causalpy/checks/convex_hull.py index 8a7e2349a..6e5fcca95 100644 --- a/causalpy/checks/convex_hull.py +++ b/causalpy/checks/convex_hull.py @@ -39,7 +39,13 @@ class ConvexHullCheck: applicable_methods: set[type[BaseExperiment]] = {SyntheticControl} def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a SyntheticControl instance.""" + """Verify the experiment is a SyntheticControl instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, SyntheticControl): raise TypeError("ConvexHullCheck requires a SyntheticControl experiment.") @@ -48,7 +54,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Run the convex hull violation check on pre-treatment data.""" + """Run the convex hull violation check on pre-treatment data. + + Parameters + ---------- + experiment : BaseExperiment + The fitted SyntheticControl experiment. + context : PipelineContext + Pipeline context (unused; required by the check protocol). + """ sc = experiment datapre_control = sc.datapre_control # type: ignore[attr-defined] datapre_treated = sc.datapre_treated # type: ignore[attr-defined] diff --git a/causalpy/checks/leave_one_out.py b/causalpy/checks/leave_one_out.py index 2f20ac1e5..a38a63b4d 100644 --- a/causalpy/checks/leave_one_out.py +++ b/causalpy/checks/leave_one_out.py @@ -48,7 +48,13 @@ class LeaveOneOut: applicable_methods: set[type[BaseExperiment]] = {SyntheticControl} def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a SyntheticControl instance.""" + """Verify the experiment is a SyntheticControl instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, SyntheticControl): raise TypeError("LeaveOneOut requires a SyntheticControl experiment.") @@ -57,7 +63,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Drop each control unit in turn and compare effect estimates.""" + """Drop each control unit in turn and compare effect estimates. + + Parameters + ---------- + experiment : BaseExperiment + The fitted SyntheticControl experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/checks/mccrary.py b/causalpy/checks/mccrary.py index 31a68f504..9a6dfd722 100644 --- a/causalpy/checks/mccrary.py +++ b/causalpy/checks/mccrary.py @@ -62,7 +62,13 @@ def __init__(self, n_bins: int = 20, alpha: float = 0.05) -> None: self.alpha = alpha def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a RegressionDiscontinuity instance.""" + """Verify the experiment is a RegressionDiscontinuity instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, RegressionDiscontinuity): raise TypeError( "McCraryDensityTest requires a RegressionDiscontinuity experiment." @@ -73,7 +79,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Test for manipulation of the running variable at the threshold.""" + """Test for manipulation of the running variable at the threshold. + + Parameters + ---------- + experiment : BaseExperiment + The fitted RegressionDiscontinuity experiment. + context : PipelineContext + Pipeline context (unused; required by the check protocol). + """ rd = experiment threshold = rd.treatment_threshold # type: ignore[attr-defined] running_var = rd.running_variable_name # type: ignore[attr-defined] diff --git a/causalpy/checks/persistence.py b/causalpy/checks/persistence.py index acd871888..9f1d887b3 100644 --- a/causalpy/checks/persistence.py +++ b/causalpy/checks/persistence.py @@ -58,7 +58,13 @@ def __init__( self.direction = direction def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a three-period ITS with treatment_end_time.""" + """Verify the experiment is a three-period ITS with treatment_end_time. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, InterruptedTimeSeries): raise TypeError( "PersistenceCheck requires an InterruptedTimeSeries experiment." @@ -77,7 +83,16 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Run persistence analysis and report whether the effect decays.""" + """Run persistence analysis and report whether the effect decays. + + Parameters + ---------- + experiment : BaseExperiment + The fitted three-period ITS experiment. + context : PipelineContext + Pipeline context (unused by this check; required by the + :class:`~causalpy.checks.base.Check` protocol). + """ its: Any = experiment persistence = its.analyze_persistence( hdi_prob=self.hdi_prob, diff --git a/causalpy/checks/placebo_in_space.py b/causalpy/checks/placebo_in_space.py index 7ffb1281c..6e17b3950 100644 --- a/causalpy/checks/placebo_in_space.py +++ b/causalpy/checks/placebo_in_space.py @@ -50,7 +50,13 @@ class PlaceboInSpace: applicable_methods: set[type[BaseExperiment]] = {SyntheticControl} def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a SyntheticControl instance.""" + """Verify the experiment is a SyntheticControl instance. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, SyntheticControl): raise TypeError("PlaceboInSpace requires a SyntheticControl experiment.") @@ -59,7 +65,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Treat each control unit as treated and compare effect magnitudes.""" + """Treat each control unit as treated and compare effect magnitudes. + + Parameters + ---------- + experiment : BaseExperiment + The fitted SyntheticControl experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/checks/placebo_in_time.py b/causalpy/checks/placebo_in_time.py index 5730ddc1b..b63d0eb72 100644 --- a/causalpy/checks/placebo_in_time.py +++ b/causalpy/checks/placebo_in_time.py @@ -213,6 +213,11 @@ def __init__( def validate(self, experiment: BaseExperiment) -> None: """Check the experiment is compatible with PlaceboInTime. + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + Raises ------ TypeError @@ -553,6 +558,16 @@ def run( Can be used standalone (``context=None``) when ``experiment_factory`` was provided, or within a pipeline. + Parameters + ---------- + experiment : BaseExperiment + The fitted experiment whose treatment time will be shifted to + generate placebo folds. + context : PipelineContext or None, default None + Pipeline context providing ``experiment_config`` for re-fits. + If ``None``, an explicit ``experiment_factory`` must have been + supplied at construction time. + Returns ------- CheckResult diff --git a/causalpy/checks/pre_treatment_placebo.py b/causalpy/checks/pre_treatment_placebo.py index 723cad9e5..7e872bbe0 100644 --- a/causalpy/checks/pre_treatment_placebo.py +++ b/causalpy/checks/pre_treatment_placebo.py @@ -49,7 +49,13 @@ def __init__(self, threshold: float = 0.05) -> None: self.threshold = threshold def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment is a fitted StaggeredDifferenceInDifferences.""" + """Verify the experiment is a fitted StaggeredDifferenceInDifferences. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment, StaggeredDifferenceInDifferences): raise TypeError( "PreTreatmentPlaceboCheck requires a " @@ -66,7 +72,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Evaluate pre-treatment event-study ATTs for evidence of pre-trends.""" + """Evaluate pre-treatment event-study ATTs for evidence of pre-trends. + + Parameters + ---------- + experiment : BaseExperiment + The fitted StaggeredDifferenceInDifferences experiment. + context : PipelineContext + Pipeline context (unused; required by the check protocol). + """ sdid = experiment att_et = sdid.att_event_time_ # type: ignore[attr-defined] diff --git a/causalpy/checks/prior_sensitivity.py b/causalpy/checks/prior_sensitivity.py index 965c11932..38136bac8 100644 --- a/causalpy/checks/prior_sensitivity.py +++ b/causalpy/checks/prior_sensitivity.py @@ -93,7 +93,13 @@ def __init__(self, alternatives: list[dict[str, Any]]) -> None: self.alternatives = alternatives def validate(self, experiment: BaseExperiment) -> None: - """Verify the experiment uses a Bayesian (PyMC) model.""" + """Verify the experiment uses a Bayesian (PyMC) model. + + Parameters + ---------- + experiment : BaseExperiment + Candidate experiment to validate. + """ if not isinstance(experiment.model, PyMCModel): raise TypeError( "PriorSensitivity requires a Bayesian (PyMC) model. " @@ -105,7 +111,15 @@ def run( experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: - """Re-fit with each alternative model and compare effect estimates.""" + """Re-fit with each alternative model and compare effect estimates. + + Parameters + ---------- + experiment : BaseExperiment + The fitted Bayesian experiment. + context : PipelineContext + Pipeline context providing ``experiment_config`` for re-fits. + """ if context.experiment_config is None: raise RuntimeError( "No experiment_config in context. Use EstimateEffect " diff --git a/causalpy/custom_exceptions.py b/causalpy/custom_exceptions.py index f61170b9e..039e268ef 100644 --- a/causalpy/custom_exceptions.py +++ b/causalpy/custom_exceptions.py @@ -18,7 +18,13 @@ class BadIndexException(Exception): """Custom exception used when we have a mismatch in types between the dataframe - index and an event, typically a treatment or intervention.""" + index and an event, typically a treatment or intervention. + + Parameters + ---------- + message : str + Human-readable description of the index mismatch. + """ def __init__(self, message: str): super().__init__(message) @@ -27,7 +33,13 @@ def __init__(self, message: str): class FormulaException(Exception): """Exception raised given when there is some error in a user-provided model - formula""" + formula. + + Parameters + ---------- + message : str + Human-readable description of the formula problem. + """ def __init__(self, message: str): super().__init__(message) @@ -35,7 +47,13 @@ def __init__(self, message: str): class DataException(Exception): - """Exception raised given when there is some error in user-provided dataframe""" + """Exception raised given when there is some error in user-provided dataframe. + + Parameters + ---------- + message : str + Human-readable description of the data problem. + """ def __init__(self, message: str): super().__init__(message) diff --git a/causalpy/experiments/__init__.py b/causalpy/experiments/__init__.py index 66b98eda1..b73902289 100644 --- a/causalpy/experiments/__init__.py +++ b/causalpy/experiments/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""CausalPy experiment module""" +"""CausalPy experiment module.""" from .diff_in_diff import DifferenceInDifferences from .instrumental_variable import InstrumentalVariable diff --git a/causalpy/experiments/base.py b/causalpy/experiments/base.py index 375873e56..f96e93988 100644 --- a/causalpy/experiments/base.py +++ b/causalpy/experiments/base.py @@ -98,6 +98,12 @@ class BaseExperiment(ABC): (e.g. ``LinearRegression``) so that ``model=None`` instantiates a sensible Bayesian default. To use an OLS/sklearn model, pass one explicitly. + Parameters + ---------- + model : PyMCModel, RegressorMixin, or None, default None + Model instance to use. If ``None`` and ``_default_model_class`` is set, + an instance of that default class is constructed. + Notes ----- Optional ``maketables`` integration is exposed through ``__maketables_*`` @@ -136,6 +142,24 @@ def __init__(self, model: PyMCModel | RegressorMixin | None = None) -> None: raise ValueError("OLS models not supported.") def fit(self, *args: Any, **kwargs: Any) -> None: + """Fit the underlying model. + + Subclasses must override this hook to delegate to their concrete + fitting routine; the base class only provides the abstract entry + point. + + Parameters + ---------- + *args : Any + Positional arguments forwarded to the subclass implementation. + **kwargs : Any + Keyword arguments forwarded to the subclass implementation. + + Raises + ------ + NotImplementedError + Always, when called on the base class. + """ raise NotImplementedError("fit method not implemented") @property @@ -308,6 +332,13 @@ def get_plot_data(self, *args: Any, **kwargs: Any) -> pd.DataFrame: Internally, this function dispatches to either :func:`get_plot_data_bayesian` or :func:`get_plot_data_ols` depending on the model type. + + Parameters + ---------- + *args + Positional arguments forwarded to the model-specific implementation. + **kwargs + Keyword arguments forwarded to the model-specific implementation. """ if isinstance(self.model, PyMCModel): return self.get_plot_data_bayesian(*args, **kwargs) @@ -317,11 +348,27 @@ def get_plot_data(self, *args: Any, **kwargs: Any) -> pd.DataFrame: raise ValueError("Unsupported model type") def get_plot_data_bayesian(self, *args: Any, **kwargs: Any) -> pd.DataFrame: - """Return plot data for Bayesian models. Override in subclasses that support Bayesian.""" + """Return plot data for Bayesian models. Override in subclasses that support Bayesian. + + Parameters + ---------- + *args + Positional arguments forwarded to the subclass implementation. + **kwargs + Keyword arguments forwarded to the subclass implementation. + """ raise NotImplementedError("get_plot_data_bayesian method not yet implemented") def get_plot_data_ols(self, *args: Any, **kwargs: Any) -> pd.DataFrame: - """Return plot data for OLS models. Override in subclasses that support OLS.""" + """Return plot data for OLS models. Override in subclasses that support OLS. + + Parameters + ---------- + *args + Positional arguments forwarded to the subclass implementation. + **kwargs + Keyword arguments forwarded to the subclass implementation. + """ raise NotImplementedError("get_plot_data_ols method not yet implemented") @abstractmethod @@ -383,6 +430,9 @@ def effect_summary( prefix : str, optional Prefix for prose generation (e.g., "During intervention", "Post-intervention"). Defaults to "Post-period". + **kwargs + Reserved for forward-compatibility; subclasses may consume + additional keyword arguments. Returns ------- diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py index 269b7e9bd..dffdbca88 100644 --- a/causalpy/experiments/diff_in_diff.py +++ b/causalpy/experiments/diff_in_diff.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Difference in differences -""" +"""Difference in differences.""" from typing import Any, Literal @@ -74,6 +72,8 @@ class DifferenceInDifferences(BaseExperiment): Defaults to "post_treatment". model : PyMCModel or RegressorMixin, optional A PyMC model for difference in differences. Defaults to LinearRegression. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -261,7 +261,7 @@ def algorithm(self) -> None: raise ValueError("Model type not recognized") def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" # Validate formula structure and interaction interaction terms self._validate_formula_interaction_terms() # Check if post_treatment_variable_name is in formula @@ -320,8 +320,11 @@ def _validate_formula_interaction_terms(self) -> None: def summary(self, round_to: int | None = 2) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Formula: {self.formula}") @@ -671,6 +674,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/instrumental_variable.py b/causalpy/experiments/instrumental_variable.py index e724f59a6..76ccde8ce 100644 --- a/causalpy/experiments/instrumental_variable.py +++ b/causalpy/experiments/instrumental_variable.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Instrumental variable regression -""" +"""Instrumental variable regression.""" import warnings # noqa: I001 @@ -65,6 +63,8 @@ class InstrumentalVariable(BaseExperiment): A indicator for whether the treatment to be modelled is binary or not. Determines which PyMC model we use to model the joint outcome and treatment. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -201,7 +201,7 @@ def algorithm(self) -> None: ) def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" treatment = self.instruments_formula.split("~")[0] test = treatment.strip() in self.instruments_data.columns test = test & (treatment.strip() in self.data.columns) @@ -224,8 +224,7 @@ def input_validation(self) -> None: ) def get_2SLS_fit(self) -> None: - """ - Two Stage Least Squares Fit + """Two Stage Least Squares Fit. This function is called by the experiment, results are used for priors if none are provided. @@ -246,8 +245,7 @@ def get_2SLS_fit(self) -> None: self.second_stage_reg = second_stage_reg def get_naive_OLS_fit(self) -> None: - """ - Naive Ordinary Least Squares + """Naive Ordinary Least Squares. This function is called by the experiment. """ @@ -291,8 +289,11 @@ def plot( def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ raise NotImplementedError("Summary method not implemented.") @@ -314,6 +315,29 @@ def effect_summary( Generate a decision-ready summary of causal effects. Note: effect_summary is not yet implemented for InstrumentalVariable experiments. + + Parameters + ---------- + window : str, tuple, or slice, default "post" + Time window for analysis (unused for InstrumentalVariable). + direction : {"increase", "decrease", "two-sided"}, default "increase" + Direction for tail probability calculation. + alpha : float, default 0.05 + Significance level for HDI/CI intervals. + cumulative : bool, default True + Whether to include cumulative effect statistics. + relative : bool, default True + Whether to include relative effect statistics. + min_effect : float, optional + Region of Practical Equivalence (ROPE) threshold. + treated_unit : str, optional + For multi-unit experiments, the unit to analyse. + period : {"intervention", "post", "comparison"}, optional + Period selector for three-period designs. + prefix : str, default "Post-period" + Prefix for prose generation. + **kwargs + Reserved for forward-compatibility. """ raise NotImplementedError( "effect_summary is not yet implemented for InstrumentalVariable experiments." diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py index 97311dbbd..149c81ede 100644 --- a/causalpy/experiments/interrupted_time_series.py +++ b/causalpy/experiments/interrupted_time_series.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Interrupted Time Series Analysis -""" +"""Interrupted Time Series Analysis.""" from typing import Any, Literal @@ -69,6 +67,25 @@ class InterruptedTimeSeries(BaseExperiment): **kwargs : dict Additional keyword arguments passed to the model. + Notes + ----- + For Bayesian models, the causal impact is calculated using the posterior expectation + (``mu``) rather than the posterior predictive (``y_hat``). This means the impact and + its uncertainty represent the systematic causal effect, excluding observation-level + noise. The uncertainty bands in the plots reflect parameter uncertainty and + counterfactual prediction uncertainty, but not individual observation variability. + + The three-period design is useful for analyzing temporary interventions such as: + + - Marketing campaigns with defined start and end dates + - Policy trials or pilot programs + - Clinical treatments with limited duration + - Seasonal interventions + + Use ``effect_summary(period="intervention")`` to analyze effects during the + intervention, and ``effect_summary(period="post")`` to analyze effect persistence + after the intervention ends. + Examples -------- **Two-period design (permanent intervention):** @@ -105,25 +122,6 @@ class InterruptedTimeSeries(BaseExperiment): >>> # Get period-specific effect summaries >>> intervention_summary = result.effect_summary(period="intervention") >>> post_summary = result.effect_summary(period="post") - - Notes - ----- - For Bayesian models, the causal impact is calculated using the posterior expectation - (``mu``) rather than the posterior predictive (``y_hat``). This means the impact and - its uncertainty represent the systematic causal effect, excluding observation-level - noise. The uncertainty bands in the plots reflect parameter uncertainty and - counterfactual prediction uncertainty, but not individual observation variability. - - The three-period design is useful for analyzing temporary interventions such as: - - - Marketing campaigns with defined start and end dates - - Policy trials or pilot programs - - Clinical treatments with limited duration - - Seasonal interventions - - Use ``effect_summary(period="intervention")`` to analyze effects during the - intervention, and ``effect_summary(period="post")`` to analyze effect persistence - after the intervention ends. """ supports_ols = True @@ -262,7 +260,17 @@ def input_validation( treatment_time: int | float | pd.Timestamp, treatment_end_time: int | float | pd.Timestamp | None = None, ) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness. + + Parameters + ---------- + data : pd.DataFrame + The experiment data. + treatment_time : int, float, or pd.Timestamp + Start of the treatment period. + treatment_end_time : int, float, pd.Timestamp, or None, default None + Optional end of the treatment period for three-period designs. + """ if isinstance(data.index, pd.DatetimeIndex) and not isinstance( treatment_time, pd.Timestamp ): @@ -591,8 +599,11 @@ def _comparison_period_summary( def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Formula: {self.formula}") @@ -965,8 +976,11 @@ def get_plot_data_bayesian(self, hdi_prob: float = HDI_PROB) -> pd.DataFrame: """ Recover the data of the experiment along with the prediction and causal impact information. - :param hdi_prob: - Prob for which the highest density interval will be computed. The default value is defined as the default from the :func:`arviz.hdi` function. + Parameters + ---------- + hdi_prob : float, default :data:`~causalpy.constants.HDI_PROB` + Probability mass of the highest density interval. Defaults to the + project-wide :data:`~causalpy.constants.HDI_PROB` (currently 0.94). """ if isinstance(self.model, PyMCModel): hdi_pct = int(round(hdi_prob * 100)) @@ -1321,6 +1335,9 @@ def effect_summary( prefix : str, optional Prefix for prose generation (e.g., "During intervention", "Post-intervention"). Defaults to "Post-period". + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/inverse_propensity_weighting.py b/causalpy/experiments/inverse_propensity_weighting.py index 0c92b2ba5..85fb4d459 100644 --- a/causalpy/experiments/inverse_propensity_weighting.py +++ b/causalpy/experiments/inverse_propensity_weighting.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Inverse propensity weighting -""" +"""Inverse propensity weighting.""" import warnings from typing import Any, Literal @@ -51,6 +49,8 @@ class InversePropensityWeighting(BaseExperiment): of these weighting schemes. model : PropensityScore, optional A PyMC model. Defaults to PropensityScore. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -593,7 +593,7 @@ def plot_ate( if method is None: method = self.weighting_scheme - def plot_weights(bins, top0, top1, ax, color="population"): + def _plot_weights(bins, top0, top1, ax, color="population"): colors_dict = { "population": ["orange", "skyblue", 0.6], "pseudo_population": ["grey", "grey", 0.1], @@ -619,7 +619,7 @@ def plot_weights(bins, top0, top1, ax, color="population"): for bar in bars: bar.set_edgecolor("black") - def make_hists(idata, i, axs, method=method): + def _make_hists(idata, i, axs, method=method): p_i = az.extract(idata)["p"][:, i].values if method == "raw": weight0 = 1 / (1 - p_i[self.t.flatten() == 0]) @@ -636,14 +636,14 @@ def make_hists(idata, i, axs, method=method): bins = np.arange(0.025, 0.99, 0.005) top0, _ = np.histogram(p_i[self.t.flatten() == 0], bins=bins) top1, _ = np.histogram(p_i[self.t.flatten() == 1], bins=bins) - plot_weights(bins, top0, top1, axs[0]) + _plot_weights(bins, top0, top1, axs[0]) top0, _ = np.histogram( p_i[self.t.flatten() == 0], bins=bins, weights=weight0 ) top1, _ = np.histogram( p_i[self.t.flatten() == 1], bins=bins, weights=weight1 ) - plot_weights(bins, top0, top1, axs[0], color="pseudo_population") + _plot_weights(bins, top0, top1, axs[0], color="pseudo_population") mosaic = """AAAAAA BBBBCC""" @@ -674,7 +674,7 @@ def make_hists(idata, i, axs, method=method): ["Treatment PS", "Control PS", "Weighted Pseudo Population", "Extreme PS"], ) - [make_hists(idata, i, axs) for i in range(prop_draws)] + [_make_hists(idata, i, axs) for i in range(prop_draws)] ate_df = pd.DataFrame( [self.get_ate(i, idata, method=method) for i in range(ate_draws)], columns=["ATE", "Y(1)", "Y(0)"], diff --git a/causalpy/experiments/panel_regression.py b/causalpy/experiments/panel_regression.py index 0ec4fc472..5a7fa07b1 100644 --- a/causalpy/experiments/panel_regression.py +++ b/causalpy/experiments/panel_regression.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Panel Regression with Fixed Effects -""" +"""Panel Regression with Fixed Effects.""" from typing import Any, Literal @@ -68,6 +66,8 @@ class PanelRegression(BaseExperiment): but doesn't directly estimate individual unit effects. model : PyMCModel or RegressorMixin, optional A PyMC (Bayesian) or sklearn (OLS) model. If None, a model must be provided. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Attributes ---------- @@ -468,6 +468,29 @@ def effect_summary( so the standard ITS/SC-style effect summary does not directly apply. Use :meth:`summary` for coefficient-level inference. + Parameters + ---------- + window : str, tuple, or slice, default "post" + Time window for analysis (placeholder; not consumed). + direction : {"increase", "decrease", "two-sided"}, default "increase" + Direction for tail probability calculation. + alpha : float, default 0.05 + Significance level for HDI/CI intervals. + cumulative : bool, default True + Whether to include cumulative effect statistics. + relative : bool, default True + Whether to include relative effect statistics. + min_effect : float, optional + Region of Practical Equivalence (ROPE) threshold. + treated_unit : str, optional + Treated unit selector for multi-unit experiments. + period : {"intervention", "post", "comparison"}, optional + Period selector for three-period designs. + prefix : str, default "Post-period" + Prefix for prose generation. + **kwargs + Reserved for forward-compatibility. + Raises ------ NotImplementedError @@ -604,6 +627,12 @@ def _plot_coefficients_internal( def get_plot_data_bayesian(self, **kwargs: Any) -> pd.DataFrame: """Get plot data for Bayesian model. + Parameters + ---------- + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. + Returns ------- pd.DataFrame @@ -636,6 +665,12 @@ def get_plot_data_bayesian(self, **kwargs: Any) -> pd.DataFrame: def get_plot_data_ols(self, **kwargs: Any) -> pd.DataFrame: """Get plot data for OLS model. + Parameters + ---------- + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. + Returns ------- pd.DataFrame diff --git a/causalpy/experiments/piecewise_its.py b/causalpy/experiments/piecewise_its.py index 99e33784d..3764702ad 100644 --- a/causalpy/experiments/piecewise_its.py +++ b/causalpy/experiments/piecewise_its.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Piecewise Interrupted Time Series Analysis (Segmented Regression) -""" +"""Piecewise Interrupted Time Series Analysis (Segmented Regression).""" import re from typing import Any, Literal @@ -83,6 +81,23 @@ class PiecewiseITS(BaseExperiment): cumulative_effect : xr.DataArray or np.ndarray Cumulative causal effect over time. + Notes + ----- + The counterfactual is computed by setting all step/ramp terms to zero, + representing what would have happened without the interventions. + + The `step` and `ramp` transforms are patsy stateful transforms that handle + both numeric and datetime time columns. For datetime, thresholds can be + specified as strings (e.g., '2020-06-01') or pd.Timestamp objects. + + References + ---------- + - Wagner AK, et al. (2002). Segmented regression analysis of interrupted + time series studies in medication use research. Journal of Clinical + Pharmacy and Therapeutics. + - Lopez Bernal J, et al. (2017). Interrupted time series regression for + the evaluation of public health interventions: a tutorial. Int J Epidemiol. + Examples -------- >>> import causalpy as cp @@ -124,23 +139,6 @@ class PiecewiseITS(BaseExperiment): ... formula="y ~ 1 + date + step(date, '2020-02-20') + ramp(date, '2020-02-20')", ... model=..., ... ) # doctest: +SKIP - - Notes - ----- - The counterfactual is computed by setting all step/ramp terms to zero, - representing what would have happened without the interventions. - - The `step` and `ramp` transforms are patsy stateful transforms that handle - both numeric and datetime time columns. For datetime, thresholds can be - specified as strings (e.g., '2020-06-01') or pd.Timestamp objects. - - References - ---------- - - Wagner AK, et al. (2002). Segmented regression analysis of interrupted - time series studies in medication use research. Journal of Clinical - Pharmacy and Therapeutics. - - Lopez Bernal J, et al. (2017). Interrupted time series regression for - the evaluation of public health interventions: a tutorial. Int J Epidemiol. """ supports_ols = True @@ -813,7 +811,31 @@ def effect_summary( prefix: str = "Post-period", **kwargs: Any, ) -> EffectSummary: - """Generate a decision-ready summary of PiecewiseITS causal effects.""" + """Generate a decision-ready summary of PiecewiseITS causal effects. + + Parameters + ---------- + window : str, tuple, or slice, default "post" + Time window for analysis (see :meth:`BaseExperiment.effect_summary`). + direction : {"increase", "decrease", "two-sided"}, default "increase" + Direction for tail probability calculation (PyMC only). + alpha : float, default 0.05 + Significance level for HDI/CI intervals (1-alpha confidence). + cumulative : bool, default True + Whether to include cumulative effect statistics. + relative : bool, default True + Whether to include relative effect statistics. + min_effect : float, optional + Region of Practical Equivalence (ROPE) threshold (PyMC only). + treated_unit : str, optional + Multi-unit experiments select which unit to analyse. + period : None + Not supported by PiecewiseITS; pass ``None``. + prefix : str, default "Post-period" + Prefix for prose generation. + **kwargs + Reserved for forward-compatibility. + """ from causalpy.reporting import ( _compute_statistics, _compute_statistics_ols, diff --git a/causalpy/experiments/prepostnegd.py b/causalpy/experiments/prepostnegd.py index 3c5fda665..8bbc808cf 100644 --- a/causalpy/experiments/prepostnegd.py +++ b/causalpy/experiments/prepostnegd.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Pretest/posttest nonequivalent group design -""" +"""Pretest/posttest nonequivalent group design.""" from typing import Any, Literal @@ -40,19 +38,23 @@ class PrePostNEGD(BaseExperiment): """ - A class to analyse data from pretest/posttest designs - - :param data: - A pandas dataframe - :param formula: - A statistical model formula - :param group_variable_name: - Name of the column in data for the group variable, should be either - binary or boolean - :param pretreatment_variable_name: - Name of the column in data for the pretreatment variable - :param model: - A PyMC model. Defaults to LinearRegression. + A class to analyse data from pretest/posttest designs. + + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + formula : str + A statistical model formula. + group_variable_name : str + Name of the column in ``data`` for the group variable; should be + either binary or boolean. + pretreatment_variable_name : str + Name of the column in ``data`` for the pretreatment variable. + model : PyMCModel, optional + A PyMC model. Defaults to :class:`LinearRegression`. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -189,7 +191,7 @@ def algorithm(self) -> None: ) def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" if not _is_variable_dummy_coded(self.data[self.group_variable_name]): raise DataException( f""" @@ -226,8 +228,11 @@ def _causal_impact_summary_stat(self, round_to: int | None = 2) -> str: def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Formula: {self.formula}") @@ -385,6 +390,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/regression_discontinuity.py b/causalpy/experiments/regression_discontinuity.py index 516c9d731..56fd157a8 100644 --- a/causalpy/experiments/regression_discontinuity.py +++ b/causalpy/experiments/regression_discontinuity.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Regression discontinuity design -""" +"""Regression discontinuity design.""" import warnings # noqa: I001 from typing import Any, Literal @@ -46,28 +44,33 @@ class RegressionDiscontinuity(BaseExperiment): """ A class to analyse sharp regression discontinuity experiments. - :param data: - A pandas dataframe - :param formula: - A statistical model formula - :param treatment_threshold: - A scalar threshold value at which the treatment is applied - :param model: - A PyMC or sklearn model. Defaults to LinearRegression. - :param running_variable_name: - The name of the predictor variable that the treatment threshold is based upon - :param epsilon: - A small scalar value which determines how far above and below the treatment - threshold to evaluate the causal impact. - :param bandwidth: - Data outside of the bandwidth (relative to the discontinuity) is not used to fit - the model. - :param donut_hole: - Observations within this distance from the treatment threshold are excluded from - model fitting. Used as a robustness check when observations closest to the - threshold may be problematic (e.g., due to manipulation or heaping). Defaults - to 0.0 (no exclusion). Must be non-negative and less than bandwidth if bandwidth - is finite. + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + formula : str + A statistical model formula. + treatment_threshold : float + A scalar threshold value at which the treatment is applied. + model : PyMCModel, RegressorMixin, or None, default None + A PyMC or sklearn model. Defaults to :class:`LinearRegression`. + running_variable_name : str, default "x" + The name of the predictor variable that the treatment threshold is + based upon. + epsilon : float, default 0.001 + A small scalar value which determines how far above and below the + treatment threshold to evaluate the causal impact. + bandwidth : float, default np.inf + Data outside of the bandwidth (relative to the discontinuity) is not + used to fit the model. + donut_hole : float, default 0.0 + Observations within this distance from the treatment threshold are + excluded from model fitting. Used as a robustness check when + observations closest to the threshold may be problematic (e.g., due + to manipulation or heaping). Must be non-negative and less than + ``bandwidth`` if ``bandwidth`` is finite. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Example -------- @@ -240,7 +243,7 @@ def algorithm(self) -> None: # ****************************************************************************** def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" if "treated" not in self.formula: raise FormulaException( "A predictor called `treated` should be in the formula" @@ -278,10 +281,13 @@ def _is_treated(self, x: np.ndarray | pd.Series) -> np.ndarray: def summary(self, round_to: int | None = None) -> None: """ - Print summary of main results and model coefficients + Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers. + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print("Regression Discontinuity experiment") print(f"Formula: {self.formula}") @@ -546,6 +552,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/regression_kink.py b/causalpy/experiments/regression_kink.py index b037af56a..7b5e47634 100644 --- a/causalpy/experiments/regression_kink.py +++ b/causalpy/experiments/regression_kink.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Regression kink design -""" +"""Regression kink design.""" import warnings # noqa: I001 @@ -44,21 +42,25 @@ class RegressionKink(BaseExperiment): """A class to analyse regression kink designs. - :param data: - A pandas dataframe - :param formula: - A statistical model formula - :param kink_point: - A scalar value at which the kink occurs - :param model: - A PyMC model. Defaults to LinearRegression. - :param running_variable_name: - The name of the running variable column - :param epsilon: - A small scalar for evaluating the causal impact above/below the kink - :param bandwidth: - Data outside of the bandwidth (relative to the kink) is not used to fit - the model. + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + formula : str + A statistical model formula. + kink_point : float + A scalar value at which the kink occurs. + model : PyMCModel, optional + A PyMC model. Defaults to :class:`LinearRegression`. + running_variable_name : str, default "x" + The name of the running variable column. + epsilon : float, default 0.001 + A small scalar for evaluating the causal impact above/below the kink. + bandwidth : float, default np.inf + Data outside of the bandwidth (relative to the kink) is not used to + fit the model. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. """ supports_ols = False @@ -163,7 +165,7 @@ def algorithm(self) -> None: ) def input_validation(self) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness.""" if "treated" not in self.formula: raise FormulaException( "A predictor called `treated` should be in the formula" @@ -229,8 +231,11 @@ def _is_treated(self, x: np.ndarray | pd.Series) -> np.ndarray: def summary(self, round_to: int | None = 2) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print( f""" @@ -389,6 +394,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/staggered_did.py b/causalpy/experiments/staggered_did.py index 2bfb0bab0..c66a79bc7 100644 --- a/causalpy/experiments/staggered_did.py +++ b/causalpy/experiments/staggered_did.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Staggered Difference in Differences (Imputation-based) +"""Staggered Difference in Differences (Imputation-based). This module implements the imputation-based staggered DiD estimator, following the approach of Borusyak, Jaravel, and Spiess (2024). It handles settings where @@ -86,6 +85,8 @@ class StaggeredDifferenceInDifferences(BaseExperiment): reference_event_time : int, optional Event-time index associated with plots (reserved for future use). Defaults to -1. + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. Attributes ---------- @@ -1039,6 +1040,9 @@ def effect_summary( Significance level for HDI/CI intervals (1-alpha confidence level). min_effect : float, optional Region of Practical Equivalence (ROPE) threshold (PyMC only, ignored for OLS). + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/experiments/synthetic_control.py b/causalpy/experiments/synthetic_control.py index 7ccc57b57..2937b9520 100644 --- a/causalpy/experiments/synthetic_control.py +++ b/causalpy/experiments/synthetic_control.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Synthetic Control Experiment -""" +"""Synthetic Control Experiment.""" import warnings from typing import Any, Literal @@ -39,23 +37,27 @@ class SyntheticControl(BaseExperiment): """The class for the synthetic control experiment. - :param data: - A pandas dataframe - :param treatment_time: - The time when treatment occurred, should be in reference to the data index - :param control_units: - A list of control units to be used in the experiment - :param treated_units: - A list of treated units to be used in the experiment - :param model: - A PyMC or sklearn model. Defaults to WeightedSumFitter. - :param min_donor_correlation: + Parameters + ---------- + data : pd.DataFrame + A pandas dataframe. + treatment_time : int, float, or pd.Timestamp + The time when treatment occurred, in reference to the data index. + control_units : list of str + A list of control units to be used in the experiment. + treated_units : list of str + A list of treated units to be used in the experiment. + model : PyMCModel, RegressorMixin, or None, default None + A PyMC or sklearn model. Defaults to :class:`WeightedSumFitter`. + min_donor_correlation : float, default 0.0 Minimum acceptable Pearson correlation between each control unit and treated unit in the pre-treatment period. Control units below this threshold trigger a ``UserWarning``. Defaults to ``0.0`` (warn on negatively correlated donors). + **kwargs + Additional keyword arguments forwarded to :class:`BaseExperiment`. - Example + Examples -------- >>> import causalpy as cp >>> df = cp.load_data("sc") @@ -311,7 +313,15 @@ def algorithm(self) -> None: def input_validation( self, data: pd.DataFrame, treatment_time: int | float | pd.Timestamp ) -> None: - """Validate the input data and model formula for correctness""" + """Validate the input data and model formula for correctness. + + Parameters + ---------- + data : pd.DataFrame + The experiment data. + treatment_time : int, float, or pd.Timestamp + The treatment time, expected to be compatible with ``data.index``. + """ if isinstance(data.index, pd.DatetimeIndex) and not isinstance( treatment_time, pd.Timestamp ): @@ -352,8 +362,11 @@ def _pre_treatment_correlations(self) -> dict[str, float]: def summary(self, round_to: int | None = None) -> None: """Print summary of main results and model coefficients. - :param round_to: - Number of decimals used to round results. Defaults to 2. Use "None" to return raw numbers + Parameters + ---------- + round_to : int, optional + Number of decimals used to round results. Defaults to 2. Use + ``None`` to return raw numbers. """ print(f"{self.expt_type:=^80}") print(f"Control units: {self.control_units}") @@ -759,11 +772,14 @@ def get_plot_data_bayesian( """ Recover the data of the PrePostFit experiment along with the prediction and causal impact information. - :param hdi_prob: - Prob for which the highest density interval will be computed. The default value is defined as the default from the :func:`arviz.hdi` function. - :param treated_unit: - Which treated unit to extract data for. Must be a string name of the treated unit. - If None, uses the first treated unit. + Parameters + ---------- + hdi_prob : float, default :data:`~causalpy.constants.HDI_PROB` + Probability mass of the highest density interval. Defaults to + the project-wide :data:`~causalpy.constants.HDI_PROB`. + treated_unit : str, optional + Which treated unit to extract data for. Must be a string name + of the treated unit. If ``None``, uses the first treated unit. """ if not isinstance(self.model, PyMCModel): raise ValueError("Unsupported model type") @@ -907,6 +923,9 @@ def effect_summary( Ignored for Synthetic Control (two-period design only). prefix : str, optional Prefix for prose generation. Defaults to "Post-period". + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- diff --git a/causalpy/maketables_adapters.py b/causalpy/maketables_adapters.py index 33bf3b593..b05837c53 100644 --- a/causalpy/maketables_adapters.py +++ b/causalpy/maketables_adapters.py @@ -36,23 +36,55 @@ class MaketablesAdapter(Protocol): """Protocol for backend-specific maketables extraction.""" def coef_table(self, experiment: Any) -> pd.DataFrame: - """Return canonical coefficient table for maketables.""" + """Return canonical coefficient table for maketables. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... def stat(self, experiment: Any, key: str) -> Any: - """Return a single model-level statistic by key.""" + """Return a single model-level statistic by key. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + key : str + Statistic identifier (see :meth:`default_stat_keys`). + """ ... def vcov_info(self, experiment: Any) -> dict[str, Any]: - """Return variance-covariance metadata dict.""" + """Return variance-covariance metadata dict. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... def stat_labels(self, experiment: Any) -> dict[str, str] | None: - """Return display labels for statistics.""" + """Return display labels for statistics. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... def default_stat_keys(self, experiment: Any) -> list[str] | None: - """Return ordered list of default statistic keys.""" + """Return ordered list of default statistic keys. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment. + """ ... @@ -218,7 +250,13 @@ class PyMCMaketablesAdapter: """Adapter for experiments backed by PyMCModel.""" def coef_table(self, experiment: Any) -> pd.DataFrame: - """Build coefficient table from PyMC posterior draws with HDI intervals.""" + """Build coefficient table from PyMC posterior draws with HDI intervals. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ labels = list(getattr(experiment, "labels", [])) if not labels: msg = "Experiment has no coefficient labels for maketables export." @@ -242,7 +280,15 @@ def coef_table(self, experiment: Any) -> pd.DataFrame: ) def stat(self, experiment: Any, key: str) -> Any: - """Return a single Bayesian model-level statistic by key.""" + """Return a single Bayesian model-level statistic by key. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + key : str + Statistic identifier. + """ stats: dict[str, Any] = { "N": _safe_observation_count(experiment), "r2": _safe_r2_value(experiment), @@ -253,15 +299,33 @@ def stat(self, experiment: Any, key: str) -> Any: return stats.get(key) def vcov_info(self, experiment: Any) -> dict[str, Any]: - """Return Bayesian posterior variance-covariance metadata.""" + """Return Bayesian posterior variance-covariance metadata. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ return {"se_type": "Bayesian posterior", "vcov": None} def stat_labels(self, experiment: Any) -> dict[str, str] | None: - """Return display labels for Bayesian model statistics.""" + """Return display labels for Bayesian model statistics. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ return {"N": "N", "r2": "Bayesian R2", "se_type": "SE type"} def default_stat_keys(self, experiment: Any) -> list[str] | None: - """Return ordered list of default statistic keys for Bayesian models.""" + """Return ordered list of default statistic keys for Bayesian models. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with a PyMC model. + """ keys = ["N"] if _safe_r2_value(experiment) is not None: keys.append("r2") @@ -272,7 +336,13 @@ class SklearnMaketablesAdapter: """Adapter for experiments backed by sklearn RegressorMixin.""" def coef_table(self, experiment: Any) -> pd.DataFrame: - """Build coefficient table from sklearn model coefficients.""" + """Build coefficient table from sklearn model coefficients. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ labels = list(getattr(experiment, "labels", [])) if not labels: msg = "Experiment has no coefficient labels for maketables export." @@ -291,7 +361,15 @@ def coef_table(self, experiment: Any) -> pd.DataFrame: return _canonical_frame(labels=labels, b=coeffs, se=nans, p=nans) def stat(self, experiment: Any, key: str) -> Any: - """Return a single OLS model-level statistic by key.""" + """Return a single OLS model-level statistic by key. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + key : str + Statistic identifier. + """ stats: dict[str, Any] = { "N": _safe_observation_count(experiment), "r2": _safe_r2_value(experiment), @@ -302,15 +380,33 @@ def stat(self, experiment: Any, key: str) -> Any: return stats.get(key) def vcov_info(self, experiment: Any) -> dict[str, Any]: - """Return OLS variance-covariance metadata.""" + """Return OLS variance-covariance metadata. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ return {"se_type": "Not available", "vcov": None} def stat_labels(self, experiment: Any) -> dict[str, str] | None: - """Return display labels for OLS model statistics.""" + """Return display labels for OLS model statistics. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ return {"N": "N", "r2": "R2", "se_type": "SE type"} def default_stat_keys(self, experiment: Any) -> list[str] | None: - """Return ordered list of default statistic keys for OLS models.""" + """Return ordered list of default statistic keys for OLS models. + + Parameters + ---------- + experiment : Any + Fitted CausalPy experiment with an sklearn model. + """ keys = ["N"] if _safe_r2_value(experiment) is not None: keys.append("r2") @@ -318,7 +414,13 @@ def default_stat_keys(self, experiment: Any) -> list[str] | None: def get_maketables_adapter(model: Any) -> MaketablesAdapter: - """Return the adapter for a model backend.""" + """Return the adapter for a model backend. + + Parameters + ---------- + model : Any + A PyMC or sklearn model instance. + """ if isinstance(model, PyMCModel): return PyMCMaketablesAdapter() if isinstance(model, RegressorMixin): diff --git a/causalpy/pipeline.py b/causalpy/pipeline.py index 0ba0f2f7b..bc0bb499e 100644 --- a/causalpy/pipeline.py +++ b/causalpy/pipeline.py @@ -87,7 +87,19 @@ class PipelineResult: @classmethod def from_context(cls, context: PipelineContext) -> PipelineResult: - """Build a ``PipelineResult`` from a completed ``PipelineContext``.""" + """Build a ``PipelineResult`` from a completed ``PipelineContext``. + + Parameters + ---------- + context : PipelineContext + Completed pipeline context to extract user-facing results from. + + Returns + ------- + PipelineResult + Snapshot containing the experiment, effect summary, sensitivity + results, and report. + """ return cls( experiment=context.experiment, effect_summary=context.effect_summary, @@ -109,11 +121,28 @@ class Step(Protocol): """ def validate(self, context: PipelineContext) -> None: - """Check configuration before execution.""" + """Check configuration before execution. + + Parameters + ---------- + context : PipelineContext + Shared pipeline context. + """ ... def run(self, context: PipelineContext) -> PipelineContext: - """Execute the step, mutating and returning the context.""" + """Execute the step, mutating and returning the context. + + Parameters + ---------- + context : PipelineContext + Shared pipeline context, which the step is allowed to mutate. + + Returns + ------- + PipelineContext + The same context, returned for chaining convenience. + """ ... diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index 605c1a41d..3e27291bb 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Custom PyMC models for causal inference""" +"""Custom PyMC models for causal inference.""" import inspect import warnings @@ -75,6 +75,16 @@ class PyMCModel(pm.Model): methods like `fit`, `predict`, and `score`. It also provides other methods which are useful for causal inference. + Parameters + ---------- + sample_kwargs : dict, optional + Dictionary of kwargs that get unpacked and passed to the + :func:`pymc.sample` function. Defaults to an empty dictionary if + ``None``. + priors : dict, optional + Dictionary of priors for the model. Defaults to ``None``, in which + case default priors are used. + Example ------- >>> import causalpy as cp @@ -163,6 +173,11 @@ def priors_from_data(self, X, y) -> dict[str, Any]: Dictionary mapping parameter names to Prior objects. The keys should match parameter names used in the model's `build_model()` method. + See Also + -------- + WeightedSumFitter.priors_from_data : Example implementation that sets + Dirichlet prior shape based on number of control units. + Notes ----- The base implementation returns an empty dictionary, meaning no @@ -201,11 +216,6 @@ def priors_from_data(self, X, y) -> dict[str, Any]: ... dims=["treated_units", "coeffs"], ... ) ... } - - See Also - -------- - WeightedSumFitter.priors_from_data : Example implementation that sets - Dirichlet prior shape based on number of control units. """ return {} @@ -247,6 +257,26 @@ def _clone(self) -> "PyMCModel": def build_model( self, X: xr.DataArray, y: xr.DataArray, coords: dict[str, Any] | None ) -> None: + """Construct the PyMC model graph. + + Subclasses must override this method to declare priors, deterministic + nodes, and the likelihood for the model. + + Parameters + ---------- + X : xarray.DataArray + Input features with dimensions ``["obs_ind", "coeffs"]``. + y : xarray.DataArray + Target variable with dimensions ``["obs_ind", "treated_units"]``. + coords : dict or None + Mapping of named dimensions to coordinate labels for the + underlying ``pm.Model``. + + Raises + ------ + NotImplementedError + Always, when called on the base class. + """ raise NotImplementedError( "This method must be implemented by a subclass" ) # pragma: no cover @@ -334,10 +364,23 @@ def predict( **kwargs, ): """ - Predict data given input data `X` + Predict data given input data `X`. .. caution:: Results in KeyError if model hasn't been fit. + + Parameters + ---------- + X : xr.DataArray + Input features for which predictions are required. + coords : dict, optional + Coordinate names for named dimensions. Forwarded to subclass + ``_data_setter`` overrides; ignored by the base implementation. + out_of_sample : bool, optional + Marker for out-of-sample prediction. Reserved for subclasses; + the base implementation does not act on it. + **kwargs + Reserved for subclass extensions. """ # Ensure random_seed is used in sample_prior_predictive() and @@ -376,6 +419,17 @@ def score(self, X, y, coords: dict[str, Any] | None = None, **kwargs) -> pd.Seri The Bayesian :math:`R^2` is not the same as the traditional coefficient of determination, https://en.wikipedia.org/wiki/Coefficient_of_determination. + Parameters + ---------- + X : xr.DataArray + Input features. + y : xr.DataArray + Observed targets to score against the posterior predictive mean. + coords : dict, optional + Coordinate names for named dimensions. Forwarded to + :meth:`predict`; ignored by the base implementation. + **kwargs + Reserved for subclass extensions. """ mu = self.predict(X) mu_data = az.extract(mu, group="posterior_predictive", var_names="mu") @@ -449,6 +503,19 @@ def calculate_impact( return impact.transpose(..., "obs_ind") def calculate_cumulative_impact(self, impact: xr.DataArray) -> xr.DataArray: + """Cumulative sum of pointwise causal impact along ``obs_ind``. + + Parameters + ---------- + impact : xarray.DataArray + Pointwise causal impact, typically the output of + :meth:`calculate_impact`. + + Returns + ------- + xarray.DataArray + Cumulative impact accumulated along the ``obs_ind`` dimension. + """ return impact.cumsum(dim="obs_ind") def print_coefficients( @@ -467,30 +534,30 @@ def print_coefficients( if self.idata is None: raise RuntimeError("Model has not been fit") - def print_row( + def _print_row( max_label_length: int, name: str, coeff_samples: xr.DataArray, round_to: int ) -> None: - """Print one row of the coefficient table""" + """Print one row of the coefficient table.""" formatted_name = f" {name: <{max_label_length}}" formatted_val = f"{round_num(coeff_samples.mean().data, round_to)}, {HDI_PROB * 100:.0f}% HDI [{round_num(coeff_samples.quantile((1 - HDI_PROB) / 2).data, round_to)}, {round_num(coeff_samples.quantile(1 - (1 - HDI_PROB) / 2).data, round_to)}]" # noqa: E501 print(f" {formatted_name} {formatted_val}") - def print_coefficients_for_unit( + def _print_coefficients_for_unit( unit_coeffs: xr.DataArray, unit_sigma: xr.DataArray, labels: list, round_to: int, ) -> None: - """Print coefficients for a single unit""" + """Print coefficients for a single unit.""" # Determine the width of the longest label max_label_length = max(len(name) for name in labels + ["y_hat_sigma"]) for name in labels: coeff_samples = unit_coeffs.sel(coeffs=name) - print_row(max_label_length, name, coeff_samples, round_to) + _print_row(max_label_length, name, coeff_samples, round_to) # Add coefficient for measurement std - print_row(max_label_length, "y_hat_sigma", unit_sigma, round_to) + _print_row(max_label_length, "y_hat_sigma", unit_sigma, round_to) print("Model coefficients:") coeffs = az.extract(self.idata.posterior, var_names="beta") @@ -515,7 +582,7 @@ def print_coefficients_for_unit( unit_sigma = az.extract(self.idata.posterior, var_names=sigma_var_name).sel( treated_units=unit ) - print_coefficients_for_unit(unit_coeffs, unit_sigma, labels, round_to or 2) + _print_coefficients_for_unit(unit_coeffs, unit_sigma, labels, round_to or 2) class LinearRegression(PyMCModel): @@ -568,7 +635,16 @@ def build_model( self, X: xr.DataArray, y: xr.DataArray, coords: dict[str, Any] | None ) -> None: """ - Defines the PyMC model + Define the PyMC model. + + Parameters + ---------- + X : xr.DataArray + Design matrix with dims ``("obs_ind", "coeffs")``. + y : xr.DataArray + Outcome with dims ``("obs_ind", "treated_units")``. + coords : dict or None + Coordinate names for the model's named dimensions. """ with self: # Ensure treated_units coordinate exists for consistency @@ -668,7 +744,16 @@ def build_model( self, X: xr.DataArray, y: xr.DataArray, coords: dict[str, Any] | None ) -> None: """ - Defines the PyMC model + Define the PyMC model. + + Parameters + ---------- + X : xr.DataArray + Design matrix with dims ``("obs_ind", "coeffs")``. + y : xr.DataArray + Outcome with dims ``("obs_ind", "treated_units")``. + coords : dict or None + Coordinate names for the model's named dimensions. """ with self: self.add_coords(coords) @@ -866,6 +951,15 @@ def build_model( ) -> None: """ Build the PyMC model with softmax-parameterized simplex weights. + + Parameters + ---------- + X : xr.DataArray + Design matrix with dims ``("obs_ind", "coeffs")``. + y : xr.DataArray + Outcome with dims ``("obs_ind", "treated_units")``. + coords : dict or None + Coordinate names for the model's named dimensions. """ if not coords or "coeffs" not in coords: raise ValueError( @@ -895,7 +989,7 @@ def build_model( class InstrumentalVariableRegression(PyMCModel): - """Custom PyMC model for instrumental linear regression + """Custom PyMC model for instrumental linear regression. Example -------- @@ -973,13 +1067,15 @@ def build_model( # type: ignore Dictionary of priors for the mus and sigmas of both regressions. Example: ``priors = {"mus": [0, 0], "sigmas": [1, 1], "eta": 2, "lkj_sd": 2}``. - vs_prior_type: An optional string. Can be "spike_and_slab" - or "horseshoe" or "normal - vs_hyperparams: An optional dictionary of priors for the - variable selection hyperparameters - binary_treatment: A flag for determining the relevant - likelihood to be used. - + vs_prior_type : {"spike_and_slab", "horseshoe", "normal"}, optional + Optional variable-selection prior type. ``None`` falls back to + standard normal priors. + vs_hyperparams : dict, optional + Hyperparameters for the variable-selection prior. Only consulted + when ``vs_prior_type`` is set. + binary_treatment : bool, default False + Whether the treatment ``t`` is binary; selects the relevant + likelihood term. """ # --- Priors --- @@ -1117,7 +1213,15 @@ def sample_predictive_distribution(self, ppc_sampler: str | None = "jax") -> Non using the JAX sampler compilation method. If using the JAX sampler it will sample only the posterior predictive distribution. If using the PYMC sampler if will sample both the prior - and posterior predictive distributions.""" + and posterior predictive distributions. + + Parameters + ---------- + ppc_sampler : {"jax", "pymc"}, optional + Backend used for posterior predictive sampling. ``"jax"`` (the + default) is much faster for the multivariate Normal likelihood; + ``"pymc"`` additionally samples the prior predictive. + """ random_seed = self.sample_kwargs.get("random_seed", None) if ppc_sampler == "jax": @@ -1160,6 +1264,29 @@ def fit( # type: ignore[override] We default to None, so the user can determine if they wish to spend time sampling the posterior predictive distribution independently. + + Parameters + ---------- + X : np.ndarray + Array used to predict the outcome ``y``. + Z : np.ndarray + Array used to predict the treatment variable ``t``. + y : np.ndarray + Focal outcome. + t : np.ndarray + Treatment whose causal impact is being estimated. + coords : dict + Coordinate names for the instruments and covariates. + priors : dict + Prior specification dictionary forwarded to :meth:`build_model`. + ppc_sampler : {"jax", "pymc"}, optional + Backend for posterior predictive sampling. ``None`` skips it. + vs_prior_type : {"spike_and_slab", "horseshoe", "normal"}, optional + Variable-selection prior type, forwarded to :meth:`build_model`. + vs_hyperparams : dict, optional + Hyperparameters for the variable-selection prior. + binary_treatment : bool, default False + Whether the treatment ``t`` is binary. """ # Ensure random_seed is used in sample_prior_predictive() and @@ -1176,8 +1303,7 @@ def fit( # type: ignore[override] class PropensityScore(PyMCModel): - r""" - Custom PyMC model for inverse propensity score models + r"""Custom PyMC model for inverse propensity score models. .. note: Generally, the `.fit()` method should be used rather than @@ -1222,7 +1348,23 @@ def build_model( # type: ignore prior: dict[str, Any] | None = None, noncentred: bool = True, ) -> None: - "Defines the PyMC propensity model" + """Define the PyMC propensity model. + + Parameters + ---------- + X : np.ndarray + Covariate matrix used to predict the treatment. + t : np.ndarray + Observed treatment indicator (0/1). + coords : dict + Coordinate names for named dimensions of the model. + prior : dict, optional + Prior specification overrides; see :attr:`default_priors` for + the expected keys. + noncentred : bool, default True + Reserved for future non-centred parameterisations of the + coefficient prior. Currently informational only. + """ with self: self.add_coords(coords) X_data = pm.Data("X", X, dims=["obs_ind", "coeffs"]) @@ -1243,6 +1385,19 @@ def fit( # type: ignore """Draw samples from posterior, prior predictive, and posterior predictive distributions. We overwrite the base method because the base method assumes a variable y and we use t to indicate the treatment variable here. + + Parameters + ---------- + X : np.ndarray + Covariate matrix used to predict the treatment. + t : np.ndarray + Observed treatment indicator (0/1). + coords : dict + Coordinate names for named dimensions of the model. + prior : dict, optional + Prior specification overrides. Defaults to ``{"b": [0, 1]}``. + noncentred : bool, default True + Forwarded to :meth:`build_model`. """ if prior is None: prior = {"b": [0, 1]} @@ -1314,8 +1469,8 @@ def fit_outcome_model( If we wish to winsorize the propensity score this can be set to clip the high and low values of the propensity at 0 + winsorize_boundary and 1-winsorize_boundary - spline_knots: int, default 30 - The number of knots we use in the 0 - 1 interval to create our spline function + spline_knots : int, default 30 + The number of knots we use in the 0 - 1 interval to create our spline function. Returns ------- @@ -1855,6 +2010,9 @@ def predict( Not used, kept for API compatibility. out_of_sample : bool, optional Not used, kept for API compatibility. + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- @@ -1896,6 +2054,8 @@ def score( Target variable with dims ["obs_ind", "treated_units"]. coords : dict, optional Not used, kept for API compatibility. + **kwargs + Forwarded to :meth:`PyMCModel.score`. Returns ------- @@ -2228,6 +2388,9 @@ def predict( Not used directly, datetime extracted from X coordinates. out_of_sample : bool, optional If True, forecast future values. If False, return in-sample predictions. + **kwargs + Reserved for forward-compatibility; not consumed by this + implementation. Returns ------- @@ -2299,6 +2462,8 @@ def score( Target variable with dims ["obs_ind", "treated_units"]. coords : dict, optional Not used, kept for API compatibility. + **kwargs + Forwarded to :meth:`PyMCModel.score`. Returns ------- diff --git a/causalpy/skl_models.py b/causalpy/skl_models.py index 7a144e019..b99ee5e3e 100644 --- a/causalpy/skl_models.py +++ b/causalpy/skl_models.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Custom scikit-learn models for causal inference""" +"""Custom scikit-learn models for causal inference.""" from functools import partial @@ -29,11 +29,25 @@ class ScikitLearnAdaptor: coef_: np.ndarray def calculate_impact(self, y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: - """Calculate the causal impact of the intervention.""" + """Calculate the causal impact of the intervention. + + Parameters + ---------- + y_true : np.ndarray + Observed outcomes. + y_pred : np.ndarray + Counterfactual predictions from the fitted model. + """ return y_true - y_pred def calculate_cumulative_impact(self, impact: np.ndarray) -> np.ndarray: - """Calculate the cumulative impact intervention.""" + """Calculate the cumulative impact intervention. + + Parameters + ---------- + impact : np.ndarray + Per-period impact estimates. + """ return np.cumsum(impact) def print_coefficients( @@ -69,15 +83,35 @@ def get_coeffs(self) -> np.ndarray: class WeightedProportion(ScikitLearnAdaptor, LinearModel, RegressorMixin): - """Weighted proportion model for causal inference. Used for synthetic control - methods for example""" + """Weighted proportion model for causal inference. + + Used for synthetic control methods, for example. + """ def loss(self, W: np.ndarray, X: np.ndarray, y: np.ndarray) -> float: - """Compute root mean squared loss with data X, weights W, and predictor y""" + """Compute root mean squared loss with data X, weights W, and predictor y. + + Parameters + ---------- + W : np.ndarray + Convex combination weights. + X : np.ndarray + Donor matrix. + y : np.ndarray + Treated unit outcomes in the pre-treatment period. + """ return np.sqrt(np.mean((y - np.dot(X, W.T)) ** 2)) def fit(self, X: np.ndarray, y: np.ndarray) -> "WeightedProportion": - """Fit model on data X with predictor y""" + """Fit model on data X with predictor y. + + Parameters + ---------- + X : np.ndarray + Donor matrix. + y : np.ndarray + Treated unit outcomes in the pre-treatment period. + """ w_start = [1 / X.shape[1]] * X.shape[1] coef_ = fmin_slsqp( partial(self.loss, X=X, y=y), @@ -91,7 +125,13 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "WeightedProportion": return self def predict(self, X: np.ndarray) -> np.ndarray: - """Predict results for data X""" + """Predict results for data X. + + Parameters + ---------- + X : np.ndarray + Donor matrix to predict from. + """ return np.dot(X, self.coef_.T) @@ -99,7 +139,13 @@ def create_causalpy_compatible_class( estimator: type[RegressorMixin], ) -> type[RegressorMixin]: """This function takes a scikit-learn estimator and returns a new class that is - compatible with CausalPy.""" + compatible with CausalPy. + + Parameters + ---------- + estimator : type[RegressorMixin] + A scikit-learn estimator class to augment. + """ _add_mixin_methods(estimator, ScikitLearnAdaptor) return estimator diff --git a/causalpy/steps/estimate_effect.py b/causalpy/steps/estimate_effect.py index b83a83bcb..b0565c5af 100644 --- a/causalpy/steps/estimate_effect.py +++ b/causalpy/steps/estimate_effect.py @@ -63,6 +63,12 @@ def __init__(self, method: type[BaseExperiment], **kwargs: Any) -> None: def validate(self, context: PipelineContext) -> None: """Check that the step is properly configured. + Parameters + ---------- + context : PipelineContext + Pipeline context (unused at validation time but required by the + pipeline step interface). + Raises ------ TypeError @@ -87,6 +93,12 @@ def run(self, context: PipelineContext) -> PipelineContext: The experiment constructor receives ``context.data`` as its first positional argument, followed by all captured keyword arguments. + Parameters + ---------- + context : PipelineContext + Pipeline context. ``context.data`` is forwarded to the experiment + constructor as the first positional argument. + Returns ------- PipelineContext diff --git a/causalpy/steps/report.py b/causalpy/steps/report.py index 770986cd8..3c01a2c73 100644 --- a/causalpy/steps/report.py +++ b/causalpy/steps/report.py @@ -71,7 +71,13 @@ def __init__( def validate(self, context: PipelineContext) -> None: """GenerateReport has no strict prerequisites; it gracefully handles - missing data.""" + missing data. + + Parameters + ---------- + context : PipelineContext + Pipeline context (unused; required by the step interface). + """ def _render_plot(self, experiment: Any) -> list[str]: """Render experiment plots as base64-encoded PNG strings.""" @@ -90,7 +96,19 @@ def _render_plot(self, experiment: Any) -> list[str]: return plots def run(self, context: PipelineContext) -> PipelineContext: - """Generate the HTML report and store it in the context.""" + """Generate the HTML report and store it in the context. + + Parameters + ---------- + context : PipelineContext + Pipeline context providing ``experiment``, ``effect_summary``, + and ``sensitivity_results`` (any of which may be ``None``). + + Returns + ------- + PipelineContext + The same context with ``report`` populated. + """ env = Environment( loader=FileSystemLoader(str(_TEMPLATE_DIR)), autoescape=True, diff --git a/causalpy/steps/sensitivity.py b/causalpy/steps/sensitivity.py index 644d33aba..5a65bc92f 100644 --- a/causalpy/steps/sensitivity.py +++ b/causalpy/steps/sensitivity.py @@ -43,6 +43,14 @@ def register_default_check( Called by check modules at import time so that ``SensitivityAnalysis.default_for`` can auto-select checks. + + Parameters + ---------- + check_class : type + The check class to register as a default. + experiment_types : set of type[BaseExperiment] + Experiment classes for which ``check_class`` should be applied by + default. """ for exp_type in experiment_types: _DEFAULT_CHECKS.setdefault(exp_type, []).append(check_class) @@ -69,7 +77,18 @@ class SensitivitySummary: @classmethod def from_results(cls, results: list[CheckResult]) -> SensitivitySummary: - """Build a summary from a list of check results.""" + """Build a summary from a list of check results. + + Parameters + ---------- + results : list of CheckResult + Individual results to aggregate. + + Returns + ------- + SensitivitySummary + Aggregated summary covering all supplied results. + """ verdicts = [r.passed for r in results if r.passed is not None] all_passed = all(verdicts) if verdicts else None @@ -127,6 +146,12 @@ def validate(self, context: PipelineContext) -> None: only check structural issues (e.g. that each object satisfies the Check protocol). + Parameters + ---------- + context : PipelineContext + Pipeline context (unused at validation time but required by the + pipeline step interface). + Raises ------ TypeError @@ -142,6 +167,18 @@ def validate(self, context: PipelineContext) -> None: def run(self, context: PipelineContext) -> PipelineContext: """Run all checks against the fitted experiment. + Parameters + ---------- + context : PipelineContext + Pipeline context containing the fitted experiment and any + ``experiment_config`` required by the checks. + + Returns + ------- + PipelineContext + The same context with ``sensitivity_results`` and ``report`` + populated. + Raises ------ RuntimeError diff --git a/causalpy/transforms.py b/causalpy/transforms.py index 2b8ff2245..42792d85e 100644 --- a/causalpy/transforms.py +++ b/causalpy/transforms.py @@ -53,13 +53,11 @@ class StepTransform: from the training data, ensuring consistent behavior when predicting on new data. - Parameters - ---------- - x : array-like - Time values (numeric or datetime) - threshold : numeric, str, or pd.Timestamp - The intervention time. For datetime x, can be a string like - '2020-01-01' which will be parsed as pd.Timestamp. + Notes + ----- + Per the patsy stateful transform protocol, ``x`` and ``threshold`` are + supplied to :meth:`memorize_chunk` and :meth:`transform` rather than to + the constructor; see those methods for parameter details. Examples -------- @@ -88,7 +86,17 @@ def _is_datetime_like(self, x: Any) -> bool: def memorize_chunk( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> None: - """Called during first pass - detect datetime and store origin.""" + """ + Detect datetime and store origin during patsy's first pass. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time. For datetime ``x`` it may be a string + like ``'2020-01-01'`` or a :class:`pd.Timestamp`. + """ if self._is_datetime_like(x): self._is_datetime = True x_dt = pd.to_datetime(x) @@ -106,7 +114,21 @@ def memorize_finish(self) -> None: def transform( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> np.ndarray: - """Transform x into step function values.""" + """ + Transform ``x`` into step function values. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time, in the same domain as ``x``. + + Returns + ------- + np.ndarray + Binary indicator with 1 where ``x >= threshold`` and 0 elsewhere. + """ if self._is_datetime and self._origin is not None: # Convert x to days from origin x_dt = pd.to_datetime(x) @@ -151,12 +173,14 @@ class RampTransform: the threshold can be specified as a string ('2020-01-01') or pd.Timestamp. - Parameters - ---------- - x : array-like - Time values (numeric or datetime) - threshold : numeric, str, or pd.Timestamp - The intervention time. + Notes + ----- + Per the patsy stateful transform protocol, ``x`` and ``threshold`` are + supplied to :meth:`memorize_chunk` and :meth:`transform` rather than to + the constructor; see those methods for parameter details. + + For datetime inputs, the ramp values represent days since the threshold. + This means the slope coefficient will be interpreted as "change per day". Examples -------- @@ -165,11 +189,6 @@ class RampTransform: >>> # Datetime time - ramp is in DAYS >>> formula = "y ~ 1 + date + ramp(date, '2020-06-01')" - - Notes - ----- - For datetime inputs, the ramp values represent days since the threshold. - This means the slope coefficient will be interpreted as "change per day". """ def __init__(self) -> None: @@ -187,7 +206,17 @@ def _is_datetime_like(self, x: Any) -> bool: def memorize_chunk( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> None: - """Called during first pass - detect datetime and store origin.""" + """ + Detect datetime and store origin during patsy's first pass. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time. For datetime ``x`` it may be a string + like ``'2020-01-01'`` or a :class:`pd.Timestamp`. + """ if self._is_datetime_like(x): self._is_datetime = True x_dt = pd.to_datetime(x) @@ -204,7 +233,22 @@ def memorize_finish(self) -> None: def transform( self, x: Any, threshold: int | float | str | pd.Timestamp ) -> np.ndarray: - """Transform x into ramp function values.""" + """ + Transform ``x`` into ramp function values. + + Parameters + ---------- + x : array-like + Time values (numeric or datetime). + threshold : int, float, str, or pd.Timestamp + The intervention time, in the same domain as ``x``. + + Returns + ------- + np.ndarray + Ramp values ``max(0, x - threshold)``. For datetime inputs, the + difference is expressed in days. + """ if self._is_datetime and self._origin is not None: # Convert x to days from origin x_dt = pd.to_datetime(x) diff --git a/causalpy/utils.py b/causalpy/utils.py index 39b886d66..6eeeb4d84 100644 --- a/causalpy/utils.py +++ b/causalpy/utils.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Utility functions -""" +"""Utility functions.""" from __future__ import annotations @@ -382,11 +380,6 @@ def extract_lift_for_mmm( If the model is not a Bayesian (PyMC) model, as uncertainty quantification requires posterior samples. - See Also - -------- - PyMC-Marketing lift test calibration : - https://www.pymc-marketing.io/en/stable/notebooks/mmm/mmm_lift_test.html - Notes ----- This function is designed for integration with PyMC-Marketing's MMM calibration @@ -395,7 +388,9 @@ def extract_lift_for_mmm( with experimental evidence. For more information on lift test calibration in MMMs, see the PyMC-Marketing - documentation: https://github.com/pymc-labs/pymc-marketing + documentation: https://github.com/pymc-labs/pymc-marketing. + Reference workflow: + https://www.pymc-marketing.io/en/stable/notebooks/mmm/mmm_lift_test.html Examples -------- diff --git a/causalpy/variable_selection_priors.py b/causalpy/variable_selection_priors.py index 5cc304847..b4be7f793 100644 --- a/causalpy/variable_selection_priors.py +++ b/causalpy/variable_selection_priors.py @@ -84,8 +84,8 @@ class SpikeAndSlabPrior: dims : str or tuple, optional Dimension names for the coefficient vector - Example - ------- + Examples + -------- >>> import pymc as pm >>> from causalpy.variable_selection_priors import SpikeAndSlabPrior >>> spike_slab = SpikeAndSlabPrior(dims="features") @@ -164,8 +164,8 @@ class HorseshoePrior: dims : str or tuple, optional Dimension names for the coefficient vector - Example - ------- + Examples + -------- >>> import pymc as pm >>> from causalpy.variable_selection_priors import HorseshoePrior >>> horseshoe = HorseshoePrior(dims="features") @@ -269,8 +269,8 @@ class VariableSelectionPrior: - mu: float or array (default=0) - Prior mean - sigma: float or array (default=1) - Prior SD - Example - ------- + Examples + -------- >>> import pymc as pm >>> from causalpy.variable_selection_priors import VariableSelectionPrior >>> # Create spike-and-slab prior @@ -379,8 +379,8 @@ def create_prior( PyMC variable The coefficient vector with the specified prior - Example - ------- + Examples + -------- >>> import pymc as pm >>> import pandas as pd >>> from causalpy.variable_selection_priors import VariableSelectionPrior diff --git a/causalpy/version.py b/causalpy/version.py index e721edb61..ca088be3d 100644 --- a/causalpy/version.py +++ b/causalpy/version.py @@ -11,6 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""CausalPy Version""" +"""CausalPy Version.""" __version__ = "0.8.0" diff --git a/environment.yml b/environment.yml index 638cab298..cfe114d7d 100644 --- a/environment.yml +++ b/environment.yml @@ -15,7 +15,6 @@ dependencies: - arviz<1.0,>=0.14.0 - codespell - graphviz - - interrogate - ipython!=8.7.0 - jinja2 - make diff --git a/pyproject.toml b/pyproject.toml index d24553b17..c2dcedd57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,6 @@ dependencies = [ dev = [ "prek", "twine", - "interrogate", "codespell", "nbformat", "nbconvert", @@ -96,7 +95,7 @@ docs = [ "sphinx-design", "sphinx-togglebutton", ] -lint = ["interrogate", "prek", "ruff", "mypy"] +lint = ["prek", "ruff", "mypy"] test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert", "papermill"] [tool.pyproject2conda] @@ -124,27 +123,6 @@ markers = [ "slow: mark test as slow.", ] -[tool.interrogate] -ignore-init-method = true -ignore-init-module = true -ignore-magic = false -ignore-semiprivate = false -ignore-private = false -ignore-property-decorators = false -ignore-module = false -ignore-nested-functions = false -ignore-nested-classes = true -ignore-setters = false -fail-under = 85 -exclude = ["setup.py", "docs", "build", "dist"] -ignore-regex = ["^get$", "^mock_.*", ".*BaseClass.*"] -# possible values: 0 (minimal output), 1 (-v), 2 (-vv) -verbose = 1 -quiet = false -whitelist-regex = [] -color = true -omit-covered-files = false - [tool.ruff.format] docstring-code-format = true @@ -164,20 +142,25 @@ extend-select = [ ignore-words = "./docs/source/.codespell/codespell-whitelist.txt" skip = "*.ipynb,*.csv,*.svg,pyproject.toml,docs/source/.codespell/codespell-whitelist.txt" -# Numpydoc validation, intentionally narrow. -# Used by the ``numpydoc-validation`` pre-commit hook to enforce that every -# public ``plot()`` method on a :class:`BaseExperiment` subclass keeps its -# numpydoc-style ``Parameters`` block in sync with the function signature -# (issue #886). The exclude regex is a negative lookahead that skips every -# node whose dotted name does *not* end in ``.plot``. The base class -# deliberately offers no public ``plot()`` (the shared dispatcher lives in -# the protected helper ``_render_plot``), so no carve-out is required. +# Numpydoc validation. Originally introduced under #886 as a narrow check on +# public ``.plot`` overrides, expanded under #898 to enforce a curated set of +# numpydoc rules across the whole ``causalpy`` package. The ``exclude`` regex +# matches any dotted node whose path contains a private (underscore-prefixed) +# component — private helpers, dunders, and members of private modules are +# considered implementation detail and not subject to docstring validation. [tool.numpydoc_validation] checks = [ "PR01", # Parameters not documented "PR02", # Unknown parameters + "PR04", # Parameter has no type + "PR07", # Parameter has no description + "PR10", # Parameter requires a space before the colon + "RT03", # Return value has no description + "SS03", # Summary does not end with a period + "GL07", # Sections are in the wrong order + "GL08", # Object does not have a docstring ] -exclude = ['^(?!.*\.plot$).*$'] +exclude = ['\._'] [tool.coverage.run] source = ["causalpy"]