Skip to content

Commit 78a6b7a

Browse files
authored
Merge pull request #38 from DoubleML/dev
Add PLPR simulations to main
2 parents 066a447 + cac2102 commit 78a6b7a

21 files changed

Lines changed: 842 additions & 36 deletions

doc/_quarto-dev.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ website:
2020
- plm/plr.qmd
2121
- plm/plr_gate.qmd
2222
- plm/plr_cate.qmd
23-
- plm/pliv.qmd
2423
- plm/lplr.qmd
24+
- plm/plpr.qmd
25+
- plm/pliv.qmd
2526
# DID
2627
- did/did_pa.qmd
2728
- did/did_cs.qmd

doc/_website.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ website:
2525
- plm/plr_gate.qmd
2626
- plm/plr_cate.qmd
2727
- plm/lplr.qmd
28+
- plm/plpr.qmd
2829
- plm/pliv.qmd
2930
- text: "DID"
3031
menu:

doc/plm/plpr.qmd

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
---
2+
title: "PLPR Models"
3+
4+
jupyter: python3
5+
---
6+
7+
8+
```{python}
9+
#| echo: false
10+
11+
import numpy as np
12+
import pandas as pd
13+
from itables import init_notebook_mode
14+
import os
15+
import sys
16+
17+
doc_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
18+
if doc_dir not in sys.path:
19+
sys.path.append(doc_dir)
20+
21+
from utils.style_tables import generate_and_show_styled_table
22+
23+
init_notebook_mode(all_interactive=True)
24+
```
25+
26+
## Coverage
27+
28+
The simulations are based on the the [make_plpr_CP2025](https://docs.doubleml.org/stable/api/datasets.html#dataset-generators)-DGP with $1000$ units and $10$ time periods. The following DGPs are considered:
29+
30+
- DGP 1: Linear in the nuisance parameters
31+
- DGP 2: Non-linear and smooth in the nuisance parameters
32+
- DGP 3: Non-linear and discontinuous in the nuisance parameters
33+
34+
35+
::: {.callout-note title="Metadata" collapse="true"}
36+
37+
```{python}
38+
#| echo: false
39+
metadata_file = '../../results/plm/plpr_ate_metadata.csv'
40+
metadata_df = pd.read_csv(metadata_file)
41+
print(metadata_df.T.to_string(header=False))
42+
```
43+
44+
:::
45+
46+
```{python}
47+
#| echo: false
48+
49+
# set up data and rename columns
50+
df_coverage = pd.read_csv("../../results/plm/plpr_ate_coverage.csv", index_col=None)
51+
52+
if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1:
53+
n_rep_coverage = df_coverage["repetition"].unique()[0]
54+
elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1:
55+
n_rep_coverage = df_coverage["n_rep"].unique()[0]
56+
else:
57+
n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined
58+
59+
display_columns_coverage = ["Learner g", "Learner m", "DGP", "Approach", "Bias", "CI Length", "Coverage", "Loss g", "Loss m"]
60+
```
61+
62+
### Partialling out
63+
64+
```{python}
65+
# | echo: false
66+
67+
generate_and_show_styled_table(
68+
main_df=df_coverage,
69+
filters={"level": 0.95, "Score": "partialling out"},
70+
display_cols=display_columns_coverage,
71+
n_rep=n_rep_coverage,
72+
level_col="level",
73+
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
74+
coverage_highlight_cols=["Coverage"]
75+
)
76+
```
77+
78+
```{python}
79+
#| echo: false
80+
81+
generate_and_show_styled_table(
82+
main_df=df_coverage,
83+
filters={"level": 0.9, "Score": "partialling out"},
84+
display_cols=display_columns_coverage,
85+
n_rep=n_rep_coverage,
86+
level_col="level",
87+
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
88+
coverage_highlight_cols=["Coverage"]
89+
)
90+
```
91+
92+
### IV-type
93+
94+
For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same type of learner (here **Learner g**).
95+
96+
```{python}
97+
#| echo: false
98+
99+
generate_and_show_styled_table(
100+
main_df=df_coverage,
101+
filters={"level": 0.95, "Score": "IV-type"},
102+
display_cols=display_columns_coverage,
103+
n_rep=n_rep_coverage,
104+
level_col="level",
105+
coverage_highlight_cols=["Coverage"]
106+
)
107+
```
108+
109+
```{python}
110+
#| echo: false
111+
112+
generate_and_show_styled_table(
113+
main_df=df_coverage,
114+
filters={"level": 0.9, "Score": "IV-type"},
115+
display_cols=display_columns_coverage,
116+
n_rep=n_rep_coverage,
117+
level_col="level",
118+
coverage_highlight_cols=["Coverage"]
119+
)
120+
```
121+
122+
123+
## Tuning
124+
125+
The simulations are based on the the [make_plpr_CP2025](https://docs.doubleml.org/stable/api/datasets.html#dataset-generators)-DGP with $1000$ units and $10$ time periods. The following DGPs are considered:
126+
127+
- DGP 1: Linear in the nuisance parameters
128+
- DGP 3: Non-linear and discontinuous in the nuisance parameters
129+
130+
This is only an example as the untuned version just relies on the default configuration.
131+
132+
::: {.callout-note title="Metadata" collapse="true"}
133+
134+
```{python}
135+
#| echo: false
136+
metadata_file = '../../results/plm/plpr_ate_tune_metadata.csv'
137+
metadata_df = pd.read_csv(metadata_file)
138+
print(metadata_df.T.to_string(header=False))
139+
```
140+
141+
:::
142+
143+
```{python}
144+
#| echo: false
145+
146+
# set up data
147+
df_tune_cov = pd.read_csv("../../results/plm/plpr_ate_tune_coverage.csv", index_col=None)
148+
149+
assert df_tune_cov["repetition"].nunique() == 1
150+
n_rep_tune_cov = df_tune_cov["repetition"].unique()[0]
151+
152+
display_columns_tune_cov = ["Learner g", "Learner m", "Tuned", "DGP", "Approach", "Bias", "CI Length", "Coverage", "Loss g", "Loss m"]
153+
```
154+
155+
156+
### Partialling out
157+
158+
```{python}
159+
# | echo: false
160+
161+
generate_and_show_styled_table(
162+
main_df=df_tune_cov,
163+
filters={"level": 0.95, "Score": "partialling out"},
164+
display_cols=display_columns_tune_cov,
165+
n_rep=n_rep_tune_cov,
166+
level_col="level",
167+
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
168+
coverage_highlight_cols=["Coverage"]
169+
)
170+
```
171+
172+
```{python}
173+
#| echo: false
174+
175+
generate_and_show_styled_table(
176+
main_df=df_tune_cov,
177+
filters={"level": 0.9, "Score": "partialling out"},
178+
display_cols=display_columns_tune_cov,
179+
n_rep=n_rep_tune_cov,
180+
level_col="level",
181+
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
182+
coverage_highlight_cols=["Coverage"]
183+
)
184+
```

doc/plm/plr.qmd

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1:
5050
else:
5151
n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined
5252
53-
display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"]
53+
display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Loss g", "Loss m"]
5454
```
5555

5656
### Partialling out
@@ -64,7 +64,7 @@ generate_and_show_styled_table(
6464
display_cols=display_columns_coverage,
6565
n_rep=n_rep_coverage,
6666
level_col="level",
67-
rename_map={"Learner g": "Learner l"},
67+
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
6868
coverage_highlight_cols=["Coverage"]
6969
)
7070
```
@@ -78,7 +78,7 @@ generate_and_show_styled_table(
7878
display_cols=display_columns_coverage,
7979
n_rep=n_rep_coverage,
8080
level_col="level",
81-
rename_map={"Learner g": "Learner l"},
81+
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
8282
coverage_highlight_cols=["Coverage"]
8383
)
8484
```

monte-cover/src/montecover/plm/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from montecover.plm.lplr_ate import LPLRATECoverageSimulation
44
from montecover.plm.lplr_ate_tune import LPLRATETuningCoverageSimulation
55
from montecover.plm.pliv_late import PLIVLATECoverageSimulation
6+
from montecover.plm.plpr_ate import PLPRATECoverageSimulation
7+
from montecover.plm.plpr_ate_tune import PLPRATETuningCoverageSimulation
68
from montecover.plm.plr_ate import PLRATECoverageSimulation
79
from montecover.plm.plr_ate_sensitivity import PLRATESensitivityCoverageSimulation
810
from montecover.plm.plr_ate_tune import PLRATETuningCoverageSimulation
@@ -16,6 +18,9 @@
1618
"PLRCATECoverageSimulation",
1719
"PLRATESensitivityCoverageSimulation",
1820
"PLRATETuningCoverageSimulation",
21+
"PLPRATECoverageSimulation",
22+
"PLPRATETuningCoverageSimulation",
1923
"LPLRATECoverageSimulation",
2024
"LPLRATETuningCoverageSimulation",
25+
"PLPRATECoverageSimulation",
2126
]
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
from typing import Any, Dict, Optional
2+
3+
import doubleml as dml
4+
from doubleml.plm.datasets import make_plpr_CP2025
5+
6+
from montecover.base import BaseSimulation
7+
from montecover.utils import create_learner_from_config
8+
9+
10+
class PLPRATECoverageSimulation(BaseSimulation):
11+
"""Simulation class for coverage properties of DoubleMLPLPR for ATE estimation."""
12+
13+
def __init__(
14+
self,
15+
config_file: str,
16+
suppress_warnings: bool = True,
17+
log_level: str = "INFO",
18+
log_file: Optional[str] = None,
19+
):
20+
super().__init__(
21+
config_file=config_file,
22+
suppress_warnings=suppress_warnings,
23+
log_level=log_level,
24+
log_file=log_file,
25+
)
26+
27+
# Calculate oracle values
28+
self._calculate_oracle_values()
29+
30+
def _process_config_parameters(self):
31+
"""Process simulation-specific parameters from config"""
32+
# Process ML models in parameter grid
33+
assert (
34+
"learners" in self.dml_parameters
35+
), "No learners specified in the config file"
36+
37+
required_learners = ["ml_g", "ml_m"]
38+
for learner in self.dml_parameters["learners"]:
39+
for ml in required_learners:
40+
assert ml in learner, f"No {ml} specified in the config file"
41+
42+
def _calculate_oracle_values(self):
43+
"""Calculate oracle values for the simulation."""
44+
self.logger.info("Calculating oracle values")
45+
46+
self.oracle_values = dict()
47+
self.oracle_values["theta"] = self.dgp_parameters["theta"]
48+
49+
def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
50+
"""Run a single repetition with the given parameters."""
51+
# Extract parameters
52+
learner_config = dml_params["learners"]
53+
learner_g_name, ml_g = create_learner_from_config(learner_config["ml_g"])
54+
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
55+
score = dml_params["score"]
56+
approach = dml_params["approach"]
57+
58+
# Model
59+
dml_model = dml.DoubleMLPLPR(
60+
obj_dml_data=dml_data,
61+
ml_l=ml_g,
62+
ml_m=ml_m,
63+
ml_g=ml_g if score == "IV-type" else None,
64+
score=score,
65+
approach=approach,
66+
)
67+
dml_model.fit()
68+
nuisance_loss = dml_model.nuisance_loss
69+
70+
result = {
71+
"coverage": [],
72+
}
73+
for level in self.confidence_parameters["level"]:
74+
level_result = dict()
75+
level_result["coverage"] = self._compute_coverage(
76+
thetas=dml_model.coef,
77+
oracle_thetas=self.oracle_values["theta"],
78+
confint=dml_model.confint(level=level),
79+
joint_confint=None,
80+
)
81+
82+
# add parameters to the result
83+
for res in level_result.values():
84+
res.update(
85+
{
86+
"Learner g": learner_g_name,
87+
"Learner m": learner_m_name,
88+
"Score": score,
89+
"Approach": approach,
90+
"level": level,
91+
"Loss g": nuisance_loss["ml_l"].mean() if score == "partialling out" else nuisance_loss["ml_g"].mean(),
92+
"Loss m": nuisance_loss["ml_m"].mean(),
93+
}
94+
)
95+
for key, res in level_result.items():
96+
result[key].append(res)
97+
98+
return result
99+
100+
def summarize_results(self):
101+
"""Summarize the simulation results."""
102+
self.logger.info("Summarizing simulation results")
103+
104+
# Group by parameter combinations
105+
groupby_cols = ["Learner g", "Learner m", "Score", "Approach", "DGP", "level"]
106+
aggregation_dict = {
107+
"Coverage": "mean",
108+
"CI Length": "mean",
109+
"Bias": "mean",
110+
"Loss g": "mean",
111+
"Loss m": "mean",
112+
"repetition": "count",
113+
}
114+
115+
# Aggregate results (possibly multiple result dfs)
116+
result_summary = dict()
117+
for result_name, result_df in self.results.items():
118+
result_summary[result_name] = (
119+
result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
120+
)
121+
self.logger.debug(f"Summarized {result_name} results")
122+
123+
return result_summary
124+
125+
def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData:
126+
"""Generate data for the simulation."""
127+
data = make_plpr_CP2025(
128+
num_id=dgp_params["num_id"],
129+
num_t=dgp_params["num_t"],
130+
dim_x=dgp_params["dim_x"],
131+
theta=dgp_params["theta"],
132+
dgp_type=dgp_params["DGP"],
133+
)
134+
dml_data = dml.DoubleMLPanelData(
135+
data,
136+
y_col="y",
137+
d_cols="d",
138+
t_col="time",
139+
id_col="id",
140+
static_panel=True,
141+
)
142+
return dml_data

0 commit comments

Comments
 (0)