|
| 1 | +""" pyplots.ai |
| 2 | +forest-basic: Meta-Analysis Forest Plot |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-27 |
| 5 | +""" |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +from lets_plot import * |
| 9 | + |
| 10 | + |
| 11 | +LetsPlot.setup_html() |
| 12 | + |
| 13 | +# Data: Meta-analysis of clinical trials comparing treatment vs control |
| 14 | +# Effect sizes are log odds ratios (log OR) - null effect at 0 |
| 15 | +studies = [ |
| 16 | + {"study": "Smith 2018", "effect_size": 0.35, "ci_lower": 0.05, "ci_upper": 0.65, "weight": 12.5}, |
| 17 | + {"study": "Johnson 2019", "effect_size": -0.12, "ci_lower": -0.45, "ci_upper": 0.21, "weight": 10.2}, |
| 18 | + {"study": "Williams 2019", "effect_size": 0.48, "ci_lower": 0.18, "ci_upper": 0.78, "weight": 11.8}, |
| 19 | + {"study": "Brown 2020", "effect_size": 0.22, "ci_lower": -0.15, "ci_upper": 0.59, "weight": 9.5}, |
| 20 | + {"study": "Davis 2020", "effect_size": 0.55, "ci_lower": 0.20, "ci_upper": 0.90, "weight": 8.7}, |
| 21 | + {"study": "Miller 2021", "effect_size": 0.15, "ci_lower": -0.18, "ci_upper": 0.48, "weight": 11.0}, |
| 22 | + {"study": "Wilson 2021", "effect_size": 0.42, "ci_lower": 0.12, "ci_upper": 0.72, "weight": 12.0}, |
| 23 | + {"study": "Moore 2022", "effect_size": 0.28, "ci_lower": -0.08, "ci_upper": 0.64, "weight": 9.8}, |
| 24 | + {"study": "Taylor 2022", "effect_size": 0.65, "ci_lower": 0.28, "ci_upper": 1.02, "weight": 7.5}, |
| 25 | + {"study": "Anderson 2023", "effect_size": 0.18, "ci_lower": -0.12, "ci_upper": 0.48, "weight": 12.8}, |
| 26 | +] |
| 27 | + |
| 28 | +df = pd.DataFrame(studies) |
| 29 | + |
| 30 | +# Calculate pooled estimate (weighted average) |
| 31 | +total_weight = df["weight"].sum() |
| 32 | +pooled_effect = (df["effect_size"] * df["weight"]).sum() / total_weight |
| 33 | +pooled_se = 0.08 # Simplified SE for visualization |
| 34 | +pooled_ci_lower = pooled_effect - 1.96 * pooled_se |
| 35 | +pooled_ci_upper = pooled_effect + 1.96 * pooled_se |
| 36 | + |
| 37 | +# Order studies by effect size and assign y positions |
| 38 | +df = df.sort_values("effect_size", ascending=True).reset_index(drop=True) |
| 39 | +df["y_pos"] = range(len(df), 0, -1) |
| 40 | + |
| 41 | +# Scale weights for marker sizes (proportional to study weight) |
| 42 | +df["marker_size"] = df["weight"] / df["weight"].max() * 8 + 2 |
| 43 | + |
| 44 | +# Create the forest plot |
| 45 | +plot = ( |
| 46 | + ggplot() |
| 47 | + # Vertical reference line at null effect (0 for log OR) |
| 48 | + + geom_vline(xintercept=0, color="#888888", size=1, linetype="dashed") |
| 49 | + # Confidence interval lines (whiskers) |
| 50 | + + geom_segment(aes(x="ci_lower", xend="ci_upper", y="y_pos", yend="y_pos"), data=df, color="#306998", size=1.5) |
| 51 | + # Point estimates (squares proportional to weight) |
| 52 | + + geom_point( |
| 53 | + aes(x="effect_size", y="y_pos", size="marker_size"), |
| 54 | + data=df, |
| 55 | + color="#306998", |
| 56 | + shape=15, # Square marker |
| 57 | + ) |
| 58 | + # Study labels on y-axis |
| 59 | + + scale_y_continuous(breaks=df["y_pos"].tolist(), labels=df["study"].tolist()) |
| 60 | + # Diamond for pooled estimate |
| 61 | + + geom_polygon( |
| 62 | + aes(x="x", y="y"), |
| 63 | + data=pd.DataFrame( |
| 64 | + {"x": [pooled_ci_lower, pooled_effect, pooled_ci_upper, pooled_effect], "y": [-0.5, -1.0, -0.5, 0.0]} |
| 65 | + ), |
| 66 | + fill="#FFD43B", |
| 67 | + color="#306998", |
| 68 | + size=1, |
| 69 | + ) |
| 70 | + # Labels and title |
| 71 | + + labs(x="Log Odds Ratio (95% CI)", y="", title="forest-basic · letsplot · pyplots.ai") |
| 72 | + # Theme and sizing |
| 73 | + + theme_minimal() |
| 74 | + + theme( |
| 75 | + plot_title=element_text(size=24, face="bold"), |
| 76 | + axis_title_x=element_text(size=20), |
| 77 | + axis_text_x=element_text(size=16), |
| 78 | + axis_text_y=element_text(size=16), |
| 79 | + legend_position="none", |
| 80 | + panel_grid_major_y=element_blank(), |
| 81 | + panel_grid_minor=element_blank(), |
| 82 | + ) |
| 83 | + + scale_size_identity() |
| 84 | + + ggsize(1600, 900) |
| 85 | +) |
| 86 | + |
| 87 | +# Add text annotation for pooled estimate using geom_text |
| 88 | +pooled_label_df = pd.DataFrame( |
| 89 | + { |
| 90 | + "x": [pooled_effect], |
| 91 | + "y": [-1.8], |
| 92 | + "label": [f"Pooled: {pooled_effect:.2f} [{pooled_ci_lower:.2f}, {pooled_ci_upper:.2f}]"], |
| 93 | + } |
| 94 | +) |
| 95 | +plot = plot + geom_text(aes(x="x", y="y", label="label"), data=pooled_label_df, size=14, color="#306998") |
| 96 | + |
| 97 | +# Save as PNG (scale 3x for 4800 × 2700 px) |
| 98 | +ggsave(plot, "plot.png", scale=3, path=".") |
| 99 | + |
| 100 | +# Save as HTML for interactivity |
| 101 | +ggsave(plot, "plot.html", path=".") |
0 commit comments