Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions plots/altair/point/scatter-basic/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
scatter-basic: Basic Scatter Plot
Library: altair
"""

import altair as alt
import numpy as np
import pandas as pd


# Data
np.random.seed(42)
x = np.random.randn(100) * 2 + 10
y = x * 0.8 + np.random.randn(100) * 2

data = pd.DataFrame({"x": x, "y": y})

# Create chart
chart = (
alt.Chart(data)
.mark_point(filled=True, opacity=0.7, size=100, color="#306998")
.encode(x=alt.X("x:Q", title="X Value"), y=alt.Y("y:Q", title="Y Value"), tooltip=["x:Q", "y:Q"])
.properties(width=1600, height=900, title="Basic Scatter Plot")
.configure_axis(labelFontSize=16, titleFontSize=20)
.configure_title(fontSize=20)
)

# Save as PNG (1600 * 3 = 4800px, 900 * 3 = 2700px)
chart.save("plot.png", scale_factor=3.0)
34 changes: 34 additions & 0 deletions plots/bokeh/scatter/scatter-basic/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
scatter-basic: Basic Scatter Plot
Library: bokeh
"""

import numpy as np
from bokeh.io import export_png
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure


# Data
np.random.seed(42)
x = np.random.randn(100) * 2 + 10
y = x * 0.8 + np.random.randn(100) * 2

source = ColumnDataSource(data={"x": x, "y": y})

# Create figure (4800 × 2700 px for 16:9 aspect ratio)
p = figure(width=4800, height=2700, title="Basic Scatter Plot", x_axis_label="X Value", y_axis_label="Y Value")

# Plot scatter
p.scatter(x="x", y="y", source=source, size=12, color="#306998", alpha=0.7)

# Styling
p.title.text_font_size = "20pt"
p.xaxis.axis_label_text_font_size = "20pt"
p.yaxis.axis_label_text_font_size = "20pt"
p.xaxis.major_label_text_font_size = "16pt"
p.yaxis.major_label_text_font_size = "16pt"
p.grid.grid_line_alpha = 0.3

# Save
export_png(p, filename="plot.png")
96 changes: 96 additions & 0 deletions plots/highcharts/scatter/scatter-basic/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
scatter-basic: Basic Scatter Plot
Library: highcharts
"""

import tempfile
import time
import urllib.request
from pathlib import Path

from highcharts_core.chart import Chart
from highcharts_core.options import HighchartsOptions
from highcharts_core.options.series.scatter import ScatterSeries
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


# Data
x = [1, 2, 3, 4, 5, 6, 7, 8]
y = [2.1, 4.3, 3.2, 5.8, 4.9, 7.2, 6.1, 8.5]
Comment on lines +19 to +20
Copy link

Copilot AI Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard-coded data is inconsistent with all other implementations. Other libraries generate 100 random points with correlation, but this uses only 8 manually specified points. This creates a significantly different visualization and doesn't properly demonstrate a scatter plot's utility for many data points. Use the same data generation pattern as other libraries: np.random.seed(42), x = np.random.randn(100) * 2 + 10, y = x * 0.8 + np.random.randn(100) * 2.

Copilot uses AI. Check for mistakes.

# Create chart with container
chart = Chart(container="container")
chart.options = HighchartsOptions()

# Chart configuration
chart.options.chart = {"type": "scatter", "width": 4800, "height": 2700, "backgroundColor": "#ffffff"}

# Title
chart.options.title = {"text": "Basic Scatter Plot", "style": {"fontSize": "60px"}}

# Axes
chart.options.x_axis = {
"title": {"text": "X Value", "style": {"fontSize": "48px"}},
"labels": {"style": {"fontSize": "40px"}},
"gridLineWidth": 1,
"gridLineColor": "rgba(0, 0, 0, 0.1)",
}
chart.options.y_axis = {
"title": {"text": "Y Value", "style": {"fontSize": "48px"}},
"labels": {"style": {"fontSize": "40px"}},
"gridLineWidth": 1,
"gridLineColor": "rgba(0, 0, 0, 0.1)",
}

# Legend (not needed for single series, but kept minimal)
chart.options.legend = {"enabled": False}

# Add series
series = ScatterSeries()
series.data = list(zip(x, y, strict=False))
series.name = "Data"
series.marker = {"radius": 20, "fillColor": "#306998", "lineWidth": 2, "lineColor": "#306998"}
chart.add_series(series)

# Download Highcharts JS for inline embedding
highcharts_url = "https://code.highcharts.com/highcharts.js"
with urllib.request.urlopen(highcharts_url, timeout=30) as response:
highcharts_js = response.read().decode("utf-8")

Comment on lines +56 to +60
Copy link

Copilot AI Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Downloading external JavaScript from an untrusted source without integrity verification poses a security risk. If code.highcharts.com is compromised, malicious code could be executed. Consider either: (1) bundling the Highcharts library with the project, (2) verifying the downloaded content's hash against a known good value, or (3) adding a comment acknowledging this trade-off if it's an intentional design decision.

Suggested change
# Download Highcharts JS for inline embedding
highcharts_url = "https://code.highcharts.com/highcharts.js"
with urllib.request.urlopen(highcharts_url, timeout=30) as response:
highcharts_js = response.read().decode("utf-8")
# Use bundled Highcharts JS for inline embedding to avoid runtime download and supply chain risk.
# See: https://github.com/highcharts/highcharts for official source.
highcharts_js_path = Path(__file__).parent.parent / "vendor" / "highcharts.js"
with open(highcharts_js_path, "r", encoding="utf-8") as f:
highcharts_js = f.read()

Copilot uses AI. Check for mistakes.
# Generate HTML with inline scripts
html_str = chart.to_js_literal()
html_content = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script>{highcharts_js}</script>
</head>
<body style="margin:0;">
<div id="container" style="width: 4800px; height: 2700px;"></div>
<script>{html_str}</script>
</body>
</html>"""

# Write temp HTML and take screenshot
with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False, encoding="utf-8") as f:
f.write(html_content)
temp_path = f.name

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=5000,3000")

driver = webdriver.Chrome(options=chrome_options)
driver.get(f"file://{temp_path}")
time.sleep(5)

# Screenshot the chart container element for exact dimensions
container = driver.find_element("id", "container")
container.screenshot("plot.png")
driver.quit()

Path(temp_path).unlink()
31 changes: 31 additions & 0 deletions plots/letsplot/point/scatter-basic/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
scatter-basic: Basic Scatter Plot
Library: letsplot
"""

import numpy as np
import pandas as pd
from lets_plot import LetsPlot, aes, element_text, geom_point, ggplot, ggsave, ggsize, labs, theme, theme_minimal


LetsPlot.setup_html()

# Data
np.random.seed(42)
x = np.random.randn(100) * 2 + 10
y = x * 0.8 + np.random.randn(100) * 2

data = pd.DataFrame({"x": x, "y": y})

# Plot
plot = (
ggplot(data, aes(x="x", y="y"))
+ geom_point(color="#306998", size=4, alpha=0.7)
+ labs(x="X Value", y="Y Value", title="Basic Scatter Plot")
+ ggsize(1600, 900)
+ theme_minimal()
+ theme(plot_title=element_text(size=20), axis_title=element_text(size=20), axis_text=element_text(size=16))
)

# Save (scale 3x to get 4800 × 2700 px)
ggsave(plot, "plot.png", path=".", scale=3)
124 changes: 16 additions & 108 deletions plots/matplotlib/scatter/scatter-basic/default.py
Original file line number Diff line number Diff line change
@@ -1,119 +1,27 @@
"""
scatter-basic: Basic Scatter Plot
Implementation for: matplotlib
Variant: default
Python: 3.10+
Library: matplotlib
"""

from typing import TYPE_CHECKING, Optional

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


if TYPE_CHECKING:
from matplotlib.figure import Figure


def create_plot(
data: pd.DataFrame,
x: str,
y: str,
figsize: tuple[float, float] = (16, 9),
alpha: float = 0.6,
size: float = 30,
color: str = "steelblue",
title: Optional[str] = None,
xlabel: Optional[str] = None,
ylabel: Optional[str] = None,
edgecolors: Optional[str] = None,
linewidth: float = 0,
**kwargs,
) -> "Figure":
"""
Create a basic scatter plot visualizing the relationship between two continuous variables.

Args:
data: Input DataFrame with required columns
x: Column name for x-axis values
y: Column name for y-axis values
figsize: Figure size as (width, height) tuple (default: (16, 9))
alpha: Transparency level for points (default: 0.6 for better visibility with many points)
size: Point size (default: 30)
color: Point color (default: "steelblue")
title: Plot title (default: None)
xlabel: X-axis label (default: uses column name)
ylabel: Y-axis label (default: uses column name)
edgecolors: Edge color for points (default: None)
linewidth: Width of edge lines (default: 0)
**kwargs: Additional parameters passed to scatter function

Returns:
Matplotlib Figure object

Raises:
ValueError: If data is empty
KeyError: If required columns not found
TypeError: If x or y columns contain non-numeric data

Example:
>>> data = pd.DataFrame({'x': [1, 2, 3], 'y': [2, 4, 3]})
>>> fig = create_plot(data, 'x', 'y')
"""
# Input validation
if data.empty:
raise ValueError("Data cannot be empty")

# Check required columns
for col in [x, y]:
if col not in data.columns:
available = ", ".join(data.columns)
raise KeyError(f"Column '{col}' not found. Available columns: {available}")

# Check if columns are numeric
if not pd.api.types.is_numeric_dtype(data[x]):
raise TypeError(f"Column '{x}' must contain numeric data")
if not pd.api.types.is_numeric_dtype(data[y]):
raise TypeError(f"Column '{y}' must contain numeric data")

# Create figure
fig, ax = plt.subplots(figsize=figsize)

# Plot data
ax.scatter(data[x], data[y], s=size, alpha=alpha, c=color, edgecolors=edgecolors, linewidth=linewidth, **kwargs)

# Labels and title
ax.set_xlabel(xlabel or x)
ax.set_ylabel(ylabel or y)

if title:
ax.set_title(title)

# Apply styling
ax.grid(True, alpha=0.3)

# Layout
plt.tight_layout()

return fig


if __name__ == "__main__":
# Sample data for testing - many points to demonstrate basic scatter
np.random.seed(42)
n_points = 500
# Data
np.random.seed(42)
x = np.random.randn(100) * 2 + 5
Copy link

Copilot AI Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Data generation is inconsistent with other libraries. Most implementations use x = np.random.randn(100) * 2 + 10, but this one uses + 5 instead of + 10. This will result in different x-axis ranges and make visual comparisons across libraries inconsistent.

Suggested change
x = np.random.randn(100) * 2 + 5
x = np.random.randn(100) * 2 + 10

Copilot uses AI. Check for mistakes.
y = x * 0.8 + np.random.randn(100) * 1.5
Copy link

Copilot AI Dec 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Data generation is inconsistent with other libraries. The y-value calculation uses * 1.5 for the random component, while most other implementations use * 2. This creates different data distributions and visual appearances across libraries.

Suggested change
y = x * 0.8 + np.random.randn(100) * 1.5
y = x * 0.8 + np.random.randn(100) * 2

Copilot uses AI. Check for mistakes.

data = pd.DataFrame(
{
"x": np.random.randn(n_points) * 2 + 10,
"y": np.random.randn(n_points) * 3 + 15 + np.random.randn(n_points) * 0.5,
}
)
# Create plot
fig, ax = plt.subplots(figsize=(16, 9))
ax.scatter(x, y, alpha=0.7, s=80, color="#306998")

# Create plot
fig = create_plot(data, "x", "y", title="Basic Scatter Plot Example", xlabel="X Value", ylabel="Y Value")
# Labels and styling
ax.set_xlabel("X Value", fontsize=20)
ax.set_ylabel("Y Value", fontsize=20)
ax.set_title("Basic Scatter Plot", fontsize=20)
ax.tick_params(axis="both", labelsize=16)
ax.grid(True, alpha=0.3)

# Save for inspection - ALWAYS use 'plot.png' as filename
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
print("Plot saved to plot.png")
plt.tight_layout()
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
41 changes: 41 additions & 0 deletions plots/plotly/scatter/scatter-basic/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
scatter-basic: Basic Scatter Plot
Library: plotly
"""

import numpy as np
import plotly.graph_objects as go


# Data
np.random.seed(42)
x = np.random.randn(100) * 2 + 10
y = x * 0.8 + np.random.randn(100) * 2

# Create figure
fig = go.Figure()

fig.add_trace(go.Scatter(x=x, y=y, mode="markers", marker={"size": 12, "color": "#306998", "opacity": 0.7}))

# Layout
fig.update_layout(
title={"text": "Basic Scatter Plot", "font": {"size": 40}, "x": 0.5, "xanchor": "center"},
xaxis={
"title": {"text": "X Value", "font": {"size": 40}},
"tickfont": {"size": 32},
"showgrid": True,
"gridcolor": "rgba(0, 0, 0, 0.1)",
},
yaxis={
"title": {"text": "Y Value", "font": {"size": 40}},
"tickfont": {"size": 32},
"showgrid": True,
"gridcolor": "rgba(0, 0, 0, 0.1)",
},
template="plotly_white",
plot_bgcolor="white",
margin={"l": 120, "r": 50, "t": 100, "b": 100},
)

# Save (4800 x 2700 px)
fig.write_image("plot.png", width=1600, height=900, scale=3)
Loading
Loading