|
| 1 | +""" pyplots.ai |
| 2 | +forest-basic: Meta-Analysis Forest Plot |
| 3 | +Library: plotnine 0.15.2 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-27 |
| 5 | +""" |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +from plotnine import ( |
| 9 | + aes, |
| 10 | + element_blank, |
| 11 | + element_line, |
| 12 | + element_rect, |
| 13 | + element_text, |
| 14 | + geom_errorbarh, |
| 15 | + geom_point, |
| 16 | + geom_polygon, |
| 17 | + geom_text, |
| 18 | + geom_vline, |
| 19 | + ggplot, |
| 20 | + labs, |
| 21 | + scale_size_identity, |
| 22 | + scale_x_continuous, |
| 23 | + scale_y_continuous, |
| 24 | + theme, |
| 25 | +) |
| 26 | + |
| 27 | + |
| 28 | +# Data: Meta-analysis of RCTs comparing treatment vs control |
| 29 | +studies = pd.DataFrame( |
| 30 | + { |
| 31 | + "study": [ |
| 32 | + "Smith 2018", |
| 33 | + "Johnson 2019", |
| 34 | + "Williams 2019", |
| 35 | + "Brown 2020", |
| 36 | + "Davis 2020", |
| 37 | + "Miller 2021", |
| 38 | + "Wilson 2021", |
| 39 | + "Moore 2022", |
| 40 | + "Taylor 2022", |
| 41 | + "Anderson 2023", |
| 42 | + ], |
| 43 | + "effect_size": [-0.45, -0.22, -0.38, -0.15, -0.52, -0.31, -0.08, -0.41, -0.25, -0.35], |
| 44 | + "ci_lower": [-0.72, -0.48, -0.61, -0.42, -0.81, -0.55, -0.35, -0.68, -0.51, -0.58], |
| 45 | + "ci_upper": [-0.18, 0.04, -0.15, 0.12, -0.23, -0.07, 0.19, -0.14, 0.01, -0.12], |
| 46 | + "weight": [9.8, 11.2, 10.5, 8.7, 7.3, 10.9, 9.1, 8.4, 11.8, 12.3], |
| 47 | + } |
| 48 | +) |
| 49 | + |
| 50 | +# Calculate pooled estimate (weighted mean) |
| 51 | +pooled_effect = (studies["effect_size"] * studies["weight"]).sum() / studies["weight"].sum() |
| 52 | +pooled_se = 0.08 # Simplified standard error for pooled estimate |
| 53 | +pooled_lower = pooled_effect - 1.96 * pooled_se |
| 54 | +pooled_upper = pooled_effect + 1.96 * pooled_se |
| 55 | + |
| 56 | +# Create y positions (studies listed top to bottom, pooled at bottom) |
| 57 | +studies["y_pos"] = range(len(studies), 0, -1) |
| 58 | + |
| 59 | +# Scale marker sizes for visibility (based on weight, scaled for canvas) |
| 60 | +studies["marker_size"] = studies["weight"] * 0.5 |
| 61 | + |
| 62 | +# Create diamond for pooled estimate |
| 63 | +diamond_y = 0 |
| 64 | +diamond = pd.DataFrame( |
| 65 | + { |
| 66 | + "x": [pooled_lower, pooled_effect, pooled_upper, pooled_effect], |
| 67 | + "y": [diamond_y, diamond_y + 0.3, diamond_y, diamond_y - 0.3], |
| 68 | + } |
| 69 | +) |
| 70 | + |
| 71 | +# Create label data for study names and effect sizes |
| 72 | +studies["label"] = ( |
| 73 | + studies["effect_size"].round(2).astype(str) |
| 74 | + + " [" |
| 75 | + + studies["ci_lower"].round(2).astype(str) |
| 76 | + + ", " |
| 77 | + + studies["ci_upper"].round(2).astype(str) |
| 78 | + + "]" |
| 79 | +) |
| 80 | + |
| 81 | +# Fixed positions for text columns |
| 82 | +x_left = -1.4 # Left column for study names |
| 83 | +x_right = 0.55 # Right column for effect sizes |
| 84 | + |
| 85 | +# Add fixed positions to dataframe |
| 86 | +studies["x_left"] = x_left |
| 87 | +studies["x_right"] = x_right |
| 88 | + |
| 89 | +# Pooled estimate label data |
| 90 | +pooled_label_left = pd.DataFrame({"x": [x_left], "y": [diamond_y], "label": ["Pooled"]}) |
| 91 | +pooled_label_right = pd.DataFrame( |
| 92 | + {"x": [x_right], "y": [diamond_y], "label": [f"{pooled_effect:.2f} [{pooled_lower:.2f}, {pooled_upper:.2f}]"]} |
| 93 | +) |
| 94 | + |
| 95 | +# Plot |
| 96 | +plot = ( |
| 97 | + ggplot() |
| 98 | + # Reference line at null effect (0 for mean difference) |
| 99 | + + geom_vline(xintercept=0, linetype="dashed", color="#888888", size=1) |
| 100 | + # Error bars (confidence intervals) |
| 101 | + + geom_errorbarh( |
| 102 | + aes(y="y_pos", xmin="ci_lower", xmax="ci_upper"), data=studies, height=0.25, size=1.2, color="#306998" |
| 103 | + ) |
| 104 | + # Point estimates (sized by weight) |
| 105 | + + geom_point(aes(x="effect_size", y="y_pos", size="marker_size"), data=studies, color="#306998", fill="#306998") |
| 106 | + + scale_size_identity() |
| 107 | + # Pooled estimate diamond |
| 108 | + + geom_polygon(aes(x="x", y="y"), data=diamond, fill="#FFD43B", color="#306998", size=1.2) |
| 109 | + # Study labels at fixed left position |
| 110 | + + geom_text(aes(x="x_left", y="y_pos", label="study"), data=studies, ha="left", size=12, color="#333333") |
| 111 | + # Effect size labels at fixed right position |
| 112 | + + geom_text(aes(x="x_right", y="y_pos", label="label"), data=studies, ha="left", size=10, color="#333333") |
| 113 | + # Pooled label |
| 114 | + + geom_text( |
| 115 | + aes(x="x", y="y", label="label"), data=pooled_label_left, ha="left", size=12, fontweight="bold", color="#333333" |
| 116 | + ) |
| 117 | + + geom_text( |
| 118 | + aes(x="x", y="y", label="label"), |
| 119 | + data=pooled_label_right, |
| 120 | + ha="left", |
| 121 | + size=10, |
| 122 | + fontweight="bold", |
| 123 | + color="#333333", |
| 124 | + ) |
| 125 | + # Labels and theme |
| 126 | + + labs(x="Mean Difference (Treatment - Control)", y="", title="forest-basic · plotnine · pyplots.ai") |
| 127 | + + scale_x_continuous(breaks=[-0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4], limits=(-1.5, 1.3)) |
| 128 | + + scale_y_continuous(breaks=[], limits=(-1, 11.5)) |
| 129 | + + theme( |
| 130 | + figure_size=(16, 9), |
| 131 | + panel_background=element_rect(fill="white"), |
| 132 | + plot_background=element_rect(fill="white"), |
| 133 | + panel_grid_major_x=element_line(color="#EEEEEE", size=0.5), |
| 134 | + panel_grid_major_y=element_blank(), |
| 135 | + panel_grid_minor=element_blank(), |
| 136 | + axis_text_x=element_text(size=16, color="#333333"), |
| 137 | + axis_text_y=element_blank(), |
| 138 | + axis_title_x=element_text(size=20, color="#333333"), |
| 139 | + axis_title_y=element_blank(), |
| 140 | + plot_title=element_text(size=24, ha="center", color="#333333"), |
| 141 | + axis_ticks_major_y=element_blank(), |
| 142 | + legend_position="none", |
| 143 | + ) |
| 144 | +) |
| 145 | + |
| 146 | +# Save |
| 147 | +plot.save("plot.png", dpi=300, verbose=False) |
0 commit comments