Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions plots/ice-basic/implementations/python/altair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
""" anyplot.ai
ice-basic: Individual Conditional Expectation (ICE) Plot
Library: altair 6.1.0 | Python 3.13.13
Quality: 89/100 | Created: 2026-05-07
"""

import os
import sys


# Prevent this file from shadowing the altair package
_script_dir = os.path.dirname(os.path.abspath(__file__)) if "__file__" in dir() else os.getcwd()
if _script_dir in sys.path:
sys.path.remove(_script_dir)

import altair as alt # noqa: E402
import numpy as np # noqa: E402
import pandas as pd # noqa: E402
from sklearn.ensemble import GradientBoostingRegressor # noqa: E402


# Theme tokens
THEME = os.getenv("ANYPLOT_THEME", "light")
PAGE_BG = "#FAF8F1" if THEME == "light" else "#1A1A17"
ELEVATED_BG = "#FFFDF6" if THEME == "light" else "#242420"
INK = "#1A1A17" if THEME == "light" else "#F0EFE8"
INK_SOFT = "#4A4A44" if THEME == "light" else "#B8B7B0"

BRAND = "#009E73"
PDP_COLOR = "#D55E00"

# Data
np.random.seed(42)
n_obs = 100
sqft = np.random.uniform(800, 3500, n_obs)
bedrooms = np.random.randint(1, 6, n_obs)
house_age = np.random.uniform(0, 50, n_obs)

price = 120 * sqft + 25000 * bedrooms - 600 * house_age + 0.008 * sqft**2 + np.random.normal(0, 25000, n_obs)

X = np.column_stack([sqft, bedrooms, house_age])
model = GradientBoostingRegressor(n_estimators=200, max_depth=4, random_state=42)
model.fit(X, price)

# Build ICE curves — vary square footage across its range for each observation
grid_size = 60
sqft_grid = np.linspace(sqft.min(), sqft.max(), grid_size)

records = []
for obs_id in range(n_obs):
X_ice = np.column_stack([sqft_grid, np.full(grid_size, bedrooms[obs_id]), np.full(grid_size, house_age[obs_id])])
preds = model.predict(X_ice)
for sq, pred in zip(sqft_grid, preds, strict=False):
records.append({"obs_id": obs_id, "sqft": sq, "price_k": pred / 1000, "series": "ICE Curves"})

ice_df = pd.DataFrame(records)

# PDP: mean prediction at each sqft grid point
pdp_df = ice_df.groupby("sqft", as_index=False)["price_k"].mean()
pdp_df["series"] = "Partial Dependence"

# Shared color scale for legend
color_scale = alt.Scale(domain=["ICE Curves", "Partial Dependence"], range=[BRAND, PDP_COLOR])
color_legend = alt.Legend(title="", labelFontSize=16, symbolSize=250, symbolStrokeWidth=3, orient="top-right")

# ICE individual curves — semi-transparent to show density
ice_layer = (
alt.Chart(ice_df)
.mark_line(opacity=0.10, strokeWidth=1.5)
.encode(
x=alt.X("sqft:Q", title="Square Footage (sq ft)"),
y=alt.Y("price_k:Q", title="Predicted Price ($K)"),
detail="obs_id:N",
color=alt.Color("series:N", scale=color_scale, legend=color_legend),
)
)

# PDP overlay — bold opaque curve showing average marginal effect
pdp_layer = (
alt.Chart(pdp_df)
.mark_line(strokeWidth=5)
.encode(x="sqft:Q", y="price_k:Q", color=alt.Color("series:N", scale=color_scale, legend=color_legend))
)

# PDP annotation — label near the right end of the curve
pdp_annotation_df = pdp_df.nlargest(15, "sqft").nsmallest(1, "sqft").copy()
pdp_annotation = (
alt.Chart(pdp_annotation_df)
.mark_text(align="right", dx=-6, dy=-18, fontSize=16, color=PDP_COLOR, fontWeight="bold")
.encode(x="sqft:Q", y="price_k:Q", text=alt.value("Partial Dependence"))
)

# Rug plot — actual observed sqft values along the x-axis
rug_df = pd.DataFrame({"sqft": sqft})
rug_layer = (
alt.Chart(rug_df)
.mark_tick(thickness=1.5, size=14, opacity=0.45, color=INK_SOFT)
.encode(x="sqft:Q", y=alt.value(890))
)

chart = (
alt.layer(ice_layer, pdp_layer, pdp_annotation, rug_layer)
.properties(
width=1600, height=900, background=PAGE_BG, title=alt.Title("ice-basic · altair · anyplot.ai", fontSize=28)
)
.configure_view(fill=PAGE_BG, strokeWidth=0)
.configure_axis(
domainColor=INK_SOFT,
tickColor=INK_SOFT,
gridColor=INK,
gridOpacity=0.10,
labelColor=INK_SOFT,
titleColor=INK,
labelFontSize=18,
titleFontSize=22,
)
.configure_title(color=INK, fontSize=28)
.configure_legend(
fillColor=ELEVATED_BG, strokeColor=INK_SOFT, labelColor=INK_SOFT, titleColor=INK, labelFontSize=16
)
)

chart.save(f"plot-{THEME}.png", scale_factor=3.0)
chart.save(f"plot-{THEME}.html")
257 changes: 257 additions & 0 deletions plots/ice-basic/metadata/python/altair.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
library: altair
language: python
specification_id: ice-basic
created: '2026-05-07T16:53:20Z'
updated: '2026-05-07T17:11:05Z'
generated_by: claude-sonnet
workflow_run: 25509381729
issue: 5238
python_version: 3.13.13
library_version: 6.1.0
preview_url_light: https://storage.googleapis.com/anyplot-images/plots/ice-basic/python/altair/plot-light.png
preview_url_dark: https://storage.googleapis.com/anyplot-images/plots/ice-basic/python/altair/plot-dark.png
preview_html_light: https://storage.googleapis.com/anyplot-images/plots/ice-basic/python/altair/plot-light.html
preview_html_dark: https://storage.googleapis.com/anyplot-images/plots/ice-basic/python/altair/plot-dark.html
quality_score: 89
review:
strengths:
- 'Perfect spec compliance: ICE lines with low alpha, bold PDP overlay, and rug
plot all implemented correctly'
- 'Correct Okabe-Ito 2-color scheme (#009E73 for ICE, #D55E00 for PDP) with shared
legend scale across layers'
- Full theme-adaptive chrome using correct token values (PAGE_BG, ELEVATED_BG, INK,
INK_SOFT) for both light and dark
- All font sizes explicitly set to spec-correct values (title=28px, axis=22px, ticks=18px,
legend=16px)
- 'Idiomatic Altair: detail channel for per-observation lines, alt.layer() composition,
shared color scale across chart layers'
- Realistic house price GBR model scenario with sensible feature ranges and plausible
price values
- Clean KISS code structure with deterministic seed and all imports used
weaknesses:
- PDP annotation placed at ~2400 sqft (middle of chart) where ICE density is highest
— text partially overlaps dense line region; placement near right edge would reduce
visual clutter
- Design excellence is solid but not exceptional — no additional storytelling elements
like highlighting outlier observations or showing heterogeneous subgroups
- Library mastery could be stronger — could use Altair selection/brushing or parameter-based
interactivity to highlight individual ICE curves on hover
image_description: |-
Light render (plot-light.png):
Background: Warm off-white, consistent with #FAF8F1 — not pure white
Chrome: Title "ice-basic · altair · anyplot.ai" centered at top, clearly readable in dark ink. Axis labels "Square Footage (sq ft)" and "Predicted Price ($K)" in dark INK_SOFT. Tick labels visible and readable at 18px. Legend in top-right corner with "ICE Curves" and "Partial Dependence" entries — small but legible.
Data: ICE curves rendered in #009E73 (brand green) at 10% opacity, creating a semi-transparent density band from ~$40K to ~$620K. Bold orange-vermillion (#D55E00) PDP line clearly overlaid. Rug plot visible along x-axis bottom. "Partial Dependence" annotation in orange placed at ~2400 sqft mid-chart, slightly overlapping the dense ICE region but still readable.
Legibility verdict: PASS — all text readable against the light background. Subtle grid at 10% opacity does not interfere.

Dark render (plot-dark.png):
Background: Near-black warm surface, consistent with #1A1A17 — not pure black
Chrome: Title and axis labels rendered in light INK (#F0EFE8), clearly readable against dark background. Tick labels in INK_SOFT (#B8B7B0) — visible and readable. Legend box uses ELEVATED_BG (#242420) with soft stroke. No dark-on-dark failures observed.
Data: ICE curve color identical to light render (#009E73 green band) — data colors unchanged. PDP line in same #D55E00 orange. The contrast of the teal density band and orange PDP line is effectively maintained on the dark surface.
Legibility verdict: PASS — all text readable against the dark background. Brand green #009E73 remains clearly visible on the dark surface.
criteria_checklist:
visual_quality:
score: 29
max: 30
items:
- id: VQ-01
name: Text Legibility
score: 8
max: 8
passed: true
comment: 'All font sizes explicitly set: title=28px, axis=22px, ticks=18px,
legend=16px. Readable in both themes.'
- id: VQ-02
name: No Overlap
score: 5
max: 6
passed: true
comment: Annotation placed mid-chart at ~2400 sqft where ICE density is highest,
creating slight visual clutter. Main content readable.
- id: VQ-03
name: Element Visibility
score: 6
max: 6
passed: true
comment: 10% opacity appropriate for 100 ICE lines, bold PDP line (strokeWidth=5)
clearly visible. Rug ticks visible at bottom.
- id: VQ-04
name: Color Accessibility
score: 2
max: 2
passed: true
comment: Okabe-Ito CVD-safe colors. ICE vs PDP distinguished by color + stroke
weight + opacity — not hue alone.
- id: VQ-05
name: Layout & Canvas
score: 4
max: 4
passed: true
comment: Plot fills canvas well (1600x900). Legend in top-right does not isolate.
Good margins.
- id: VQ-06
name: Axis Labels & Title
score: 2
max: 2
passed: true
comment: 'X: ''Square Footage (sq ft)'', Y: ''Predicted Price ($K)'' — both
descriptive with units.'
- id: VQ-07
name: Palette Compliance
score: 2
max: 2
passed: true
comment: 'First series #009E73, second #D55E00 (Okabe-Ito order). Backgrounds
correct (#FAF8F1 / #1A1A17). Chrome correctly adapts to theme in both renders.'
design_excellence:
score: 13
max: 20
items:
- id: DE-01
name: Aesthetic Sophistication
score: 5
max: 8
passed: true
comment: Thoughtful Okabe-Ito 2-color scheme, intentional semi-transparency
for density visualization. Above well-configured defaults but not FiveThirtyEight
level.
- id: DE-02
name: Visual Refinement
score: 4
max: 6
passed: true
comment: Subtle grid (10% opacity), view border removed (strokeWidth=0), refined
INK_SOFT axis colors, legend with ELEVATED_BG fill. Good refinement.
- id: DE-03
name: Data Storytelling
score: 4
max: 6
passed: true
comment: Visual hierarchy through bold+opaque PDP vs. transparent ICE. Rug
plot adds distribution context. Annotation labels PDP. Good storytelling
but no emphasis on specific subgroup behavior.
spec_compliance:
score: 15
max: 15
items:
- id: SC-01
name: Plot Type
score: 5
max: 5
passed: true
comment: Correct ICE plot with individual lines per observation.
- id: SC-02
name: Required Features
score: 4
max: 4
passed: true
comment: Low-alpha individual lines, bold PDP overlay, rug plot — all spec
notes implemented.
- id: SC-03
name: Data Mapping
score: 3
max: 3
passed: true
comment: sqft on x, price_k on y, detail=obs_id:N creates individual ICE lines
correctly.
- id: SC-04
name: Title & Legend
score: 3
max: 3
passed: true
comment: Title 'ice-basic · altair · anyplot.ai' correct. Legend labels 'ICE
Curves' and 'Partial Dependence' are descriptive.
data_quality:
score: 15
max: 15
items:
- id: DQ-01
name: Feature Coverage
score: 6
max: 6
passed: true
comment: 100 observations, 60 grid points, GBR model showing non-linear ICE
curves with heterogeneous spread.
- id: DQ-02
name: Realistic Context
score: 5
max: 5
passed: true
comment: House price prediction with GradientBoostingRegressor — realistic,
neutral, comprehensible scenario.
- id: DQ-03
name: Appropriate Scale
score: 4
max: 4
passed: true
comment: 800-3500 sqft range and $40K-$620K predicted prices are plausible
for residential housing.
code_quality:
score: 10
max: 10
items:
- id: CQ-01
name: KISS Structure
score: 3
max: 3
passed: true
comment: 'Linear: imports → data → model → ICE computation → plot → save.
No functions or classes.'
- id: CQ-02
name: Reproducibility
score: 2
max: 2
passed: true
comment: np.random.seed(42) set.
- id: CQ-03
name: Clean Imports
score: 2
max: 2
passed: true
comment: All imports (os, sys, altair, numpy, pandas, sklearn) are used.
- id: CQ-04
name: Code Elegance
score: 2
max: 2
passed: true
comment: Clean Pythonic code, appropriate complexity for the visualization.
- id: CQ-05
name: Output & API
score: 1
max: 1
passed: true
comment: Saves plot-{THEME}.png and plot-{THEME}.html. Current Altair 6.1.0
API.
library_mastery:
score: 7
max: 10
items:
- id: LM-01
name: Idiomatic Usage
score: 4
max: 5
passed: true
comment: 'Good Altair usage: detail channel for grouped lines, alt.layer()
composition, shared color scale across layers, configure_* for global theme.'
- id: LM-02
name: Distinctive Features
score: 3
max: 5
passed: true
comment: detail:N channel for per-observation line grouping (Vega-Lite grammar
feature), mark_tick for rug plot, shared color scale with legend across
layered charts.
verdict: APPROVED
impl_tags:
dependencies:
- sklearn
techniques:
- layer-composition
- annotations
- html-export
patterns:
- data-generation
- iteration-over-groups
- groupby-aggregation
dataprep: []
styling:
- alpha-blending
Loading