-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add scatter-basic implementation (9 libraries) #510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8ae9db1
6b7480a
e8b2c96
2357d93
538ac43
977f88a
dd562dd
a9c9189
5dc0a6e
17012c2
f54418e
8810721
1565d81
99a55c3
97b5327
b8c6137
7c4e4f0
54689e5
897991c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| """ | ||
| scatter-basic: Basic Scatter Plot | ||
| Library: altair | ||
| """ | ||
|
|
||
| import altair as alt | ||
| import numpy as np | ||
| import pandas as pd | ||
|
|
||
|
|
||
| # Data | ||
| np.random.seed(42) | ||
| x = np.random.randn(100) * 2 + 10 | ||
| y = x * 0.8 + np.random.randn(100) * 2 | ||
|
|
||
| data = pd.DataFrame({"x": x, "y": y}) | ||
|
|
||
| # Create chart | ||
| chart = ( | ||
| alt.Chart(data) | ||
| .mark_point(filled=True, opacity=0.7, size=100, color="#306998") | ||
| .encode(x=alt.X("x:Q", title="X Value"), y=alt.Y("y:Q", title="Y Value"), tooltip=["x:Q", "y:Q"]) | ||
| .properties(width=1600, height=900, title="Basic Scatter Plot") | ||
| .configure_axis(labelFontSize=16, titleFontSize=20) | ||
| .configure_title(fontSize=20) | ||
| ) | ||
|
|
||
| # Save as PNG (1600 * 3 = 4800px, 900 * 3 = 2700px) | ||
| chart.save("plot.png", scale_factor=3.0) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| """ | ||
| scatter-basic: Basic Scatter Plot | ||
| Library: bokeh | ||
| """ | ||
|
|
||
| import numpy as np | ||
| from bokeh.io import export_png | ||
| from bokeh.models import ColumnDataSource | ||
| from bokeh.plotting import figure | ||
|
|
||
|
|
||
| # Data | ||
| np.random.seed(42) | ||
| x = np.random.randn(100) * 2 + 10 | ||
| y = x * 0.8 + np.random.randn(100) * 2 | ||
|
|
||
| source = ColumnDataSource(data={"x": x, "y": y}) | ||
|
|
||
| # Create figure (4800 × 2700 px for 16:9 aspect ratio) | ||
| p = figure(width=4800, height=2700, title="Basic Scatter Plot", x_axis_label="X Value", y_axis_label="Y Value") | ||
|
|
||
| # Plot scatter | ||
| p.scatter(x="x", y="y", source=source, size=12, color="#306998", alpha=0.7) | ||
|
|
||
| # Styling | ||
| p.title.text_font_size = "20pt" | ||
| p.xaxis.axis_label_text_font_size = "20pt" | ||
| p.yaxis.axis_label_text_font_size = "20pt" | ||
| p.xaxis.major_label_text_font_size = "16pt" | ||
| p.yaxis.major_label_text_font_size = "16pt" | ||
| p.grid.grid_line_alpha = 0.3 | ||
|
|
||
| # Save | ||
| export_png(p, filename="plot.png") |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,96 @@ | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| scatter-basic: Basic Scatter Plot | ||||||||||||||||||||
| Library: highcharts | ||||||||||||||||||||
| """ | ||||||||||||||||||||
|
|
||||||||||||||||||||
| import tempfile | ||||||||||||||||||||
| import time | ||||||||||||||||||||
| import urllib.request | ||||||||||||||||||||
| from pathlib import Path | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from highcharts_core.chart import Chart | ||||||||||||||||||||
| from highcharts_core.options import HighchartsOptions | ||||||||||||||||||||
| from highcharts_core.options.series.scatter import ScatterSeries | ||||||||||||||||||||
| from selenium import webdriver | ||||||||||||||||||||
| from selenium.webdriver.chrome.options import Options | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| # Data | ||||||||||||||||||||
| x = [1, 2, 3, 4, 5, 6, 7, 8] | ||||||||||||||||||||
| y = [2.1, 4.3, 3.2, 5.8, 4.9, 7.2, 6.1, 8.5] | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Create chart with container | ||||||||||||||||||||
| chart = Chart(container="container") | ||||||||||||||||||||
| chart.options = HighchartsOptions() | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Chart configuration | ||||||||||||||||||||
| chart.options.chart = {"type": "scatter", "width": 4800, "height": 2700, "backgroundColor": "#ffffff"} | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Title | ||||||||||||||||||||
| chart.options.title = {"text": "Basic Scatter Plot", "style": {"fontSize": "60px"}} | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Axes | ||||||||||||||||||||
| chart.options.x_axis = { | ||||||||||||||||||||
| "title": {"text": "X Value", "style": {"fontSize": "48px"}}, | ||||||||||||||||||||
| "labels": {"style": {"fontSize": "40px"}}, | ||||||||||||||||||||
| "gridLineWidth": 1, | ||||||||||||||||||||
| "gridLineColor": "rgba(0, 0, 0, 0.1)", | ||||||||||||||||||||
| } | ||||||||||||||||||||
| chart.options.y_axis = { | ||||||||||||||||||||
| "title": {"text": "Y Value", "style": {"fontSize": "48px"}}, | ||||||||||||||||||||
| "labels": {"style": {"fontSize": "40px"}}, | ||||||||||||||||||||
| "gridLineWidth": 1, | ||||||||||||||||||||
| "gridLineColor": "rgba(0, 0, 0, 0.1)", | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Legend (not needed for single series, but kept minimal) | ||||||||||||||||||||
| chart.options.legend = {"enabled": False} | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Add series | ||||||||||||||||||||
| series = ScatterSeries() | ||||||||||||||||||||
| series.data = list(zip(x, y, strict=False)) | ||||||||||||||||||||
| series.name = "Data" | ||||||||||||||||||||
| series.marker = {"radius": 20, "fillColor": "#306998", "lineWidth": 2, "lineColor": "#306998"} | ||||||||||||||||||||
| chart.add_series(series) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Download Highcharts JS for inline embedding | ||||||||||||||||||||
| highcharts_url = "https://code.highcharts.com/highcharts.js" | ||||||||||||||||||||
| with urllib.request.urlopen(highcharts_url, timeout=30) as response: | ||||||||||||||||||||
| highcharts_js = response.read().decode("utf-8") | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
Comment on lines
+56
to
+60
|
||||||||||||||||||||
| # Download Highcharts JS for inline embedding | |
| highcharts_url = "https://code.highcharts.com/highcharts.js" | |
| with urllib.request.urlopen(highcharts_url, timeout=30) as response: | |
| highcharts_js = response.read().decode("utf-8") | |
| # Use bundled Highcharts JS for inline embedding to avoid runtime download and supply chain risk. | |
| # See: https://github.com/highcharts/highcharts for official source. | |
| highcharts_js_path = Path(__file__).parent.parent / "vendor" / "highcharts.js" | |
| with open(highcharts_js_path, "r", encoding="utf-8") as f: | |
| highcharts_js = f.read() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| """ | ||
| scatter-basic: Basic Scatter Plot | ||
| Library: letsplot | ||
| """ | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
| from lets_plot import LetsPlot, aes, element_text, geom_point, ggplot, ggsave, ggsize, labs, theme, theme_minimal | ||
|
|
||
|
|
||
| LetsPlot.setup_html() | ||
|
|
||
| # Data | ||
| np.random.seed(42) | ||
| x = np.random.randn(100) * 2 + 10 | ||
| y = x * 0.8 + np.random.randn(100) * 2 | ||
|
|
||
| data = pd.DataFrame({"x": x, "y": y}) | ||
|
|
||
| # Plot | ||
| plot = ( | ||
| ggplot(data, aes(x="x", y="y")) | ||
| + geom_point(color="#306998", size=4, alpha=0.7) | ||
| + labs(x="X Value", y="Y Value", title="Basic Scatter Plot") | ||
| + ggsize(1600, 900) | ||
| + theme_minimal() | ||
| + theme(plot_title=element_text(size=20), axis_title=element_text(size=20), axis_text=element_text(size=16)) | ||
| ) | ||
|
|
||
| # Save (scale 3x to get 4800 × 2700 px) | ||
| ggsave(plot, "plot.png", path=".", scale=3) |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,119 +1,27 @@ | ||||||
| """ | ||||||
| scatter-basic: Basic Scatter Plot | ||||||
| Implementation for: matplotlib | ||||||
| Variant: default | ||||||
| Python: 3.10+ | ||||||
| Library: matplotlib | ||||||
| """ | ||||||
|
|
||||||
| from typing import TYPE_CHECKING, Optional | ||||||
|
|
||||||
| import matplotlib.pyplot as plt | ||||||
| import numpy as np | ||||||
| import pandas as pd | ||||||
|
|
||||||
|
|
||||||
| if TYPE_CHECKING: | ||||||
| from matplotlib.figure import Figure | ||||||
|
|
||||||
|
|
||||||
| def create_plot( | ||||||
| data: pd.DataFrame, | ||||||
| x: str, | ||||||
| y: str, | ||||||
| figsize: tuple[float, float] = (16, 9), | ||||||
| alpha: float = 0.6, | ||||||
| size: float = 30, | ||||||
| color: str = "steelblue", | ||||||
| title: Optional[str] = None, | ||||||
| xlabel: Optional[str] = None, | ||||||
| ylabel: Optional[str] = None, | ||||||
| edgecolors: Optional[str] = None, | ||||||
| linewidth: float = 0, | ||||||
| **kwargs, | ||||||
| ) -> "Figure": | ||||||
| """ | ||||||
| Create a basic scatter plot visualizing the relationship between two continuous variables. | ||||||
|
|
||||||
| Args: | ||||||
| data: Input DataFrame with required columns | ||||||
| x: Column name for x-axis values | ||||||
| y: Column name for y-axis values | ||||||
| figsize: Figure size as (width, height) tuple (default: (16, 9)) | ||||||
| alpha: Transparency level for points (default: 0.6 for better visibility with many points) | ||||||
| size: Point size (default: 30) | ||||||
| color: Point color (default: "steelblue") | ||||||
| title: Plot title (default: None) | ||||||
| xlabel: X-axis label (default: uses column name) | ||||||
| ylabel: Y-axis label (default: uses column name) | ||||||
| edgecolors: Edge color for points (default: None) | ||||||
| linewidth: Width of edge lines (default: 0) | ||||||
| **kwargs: Additional parameters passed to scatter function | ||||||
|
|
||||||
| Returns: | ||||||
| Matplotlib Figure object | ||||||
|
|
||||||
| Raises: | ||||||
| ValueError: If data is empty | ||||||
| KeyError: If required columns not found | ||||||
| TypeError: If x or y columns contain non-numeric data | ||||||
|
|
||||||
| Example: | ||||||
| >>> data = pd.DataFrame({'x': [1, 2, 3], 'y': [2, 4, 3]}) | ||||||
| >>> fig = create_plot(data, 'x', 'y') | ||||||
| """ | ||||||
| # Input validation | ||||||
| if data.empty: | ||||||
| raise ValueError("Data cannot be empty") | ||||||
|
|
||||||
| # Check required columns | ||||||
| for col in [x, y]: | ||||||
| if col not in data.columns: | ||||||
| available = ", ".join(data.columns) | ||||||
| raise KeyError(f"Column '{col}' not found. Available columns: {available}") | ||||||
|
|
||||||
| # Check if columns are numeric | ||||||
| if not pd.api.types.is_numeric_dtype(data[x]): | ||||||
| raise TypeError(f"Column '{x}' must contain numeric data") | ||||||
| if not pd.api.types.is_numeric_dtype(data[y]): | ||||||
| raise TypeError(f"Column '{y}' must contain numeric data") | ||||||
|
|
||||||
| # Create figure | ||||||
| fig, ax = plt.subplots(figsize=figsize) | ||||||
|
|
||||||
| # Plot data | ||||||
| ax.scatter(data[x], data[y], s=size, alpha=alpha, c=color, edgecolors=edgecolors, linewidth=linewidth, **kwargs) | ||||||
|
|
||||||
| # Labels and title | ||||||
| ax.set_xlabel(xlabel or x) | ||||||
| ax.set_ylabel(ylabel or y) | ||||||
|
|
||||||
| if title: | ||||||
| ax.set_title(title) | ||||||
|
|
||||||
| # Apply styling | ||||||
| ax.grid(True, alpha=0.3) | ||||||
|
|
||||||
| # Layout | ||||||
| plt.tight_layout() | ||||||
|
|
||||||
| return fig | ||||||
|
|
||||||
|
|
||||||
| if __name__ == "__main__": | ||||||
| # Sample data for testing - many points to demonstrate basic scatter | ||||||
| np.random.seed(42) | ||||||
| n_points = 500 | ||||||
| # Data | ||||||
| np.random.seed(42) | ||||||
| x = np.random.randn(100) * 2 + 5 | ||||||
|
||||||
| x = np.random.randn(100) * 2 + 5 | |
| x = np.random.randn(100) * 2 + 10 |
Copilot
AI
Dec 7, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Data generation is inconsistent with other libraries. The y-value calculation uses * 1.5 for the random component, while most other implementations use * 2. This creates different data distributions and visual appearances across libraries.
| y = x * 0.8 + np.random.randn(100) * 1.5 | |
| y = x * 0.8 + np.random.randn(100) * 2 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| """ | ||
| scatter-basic: Basic Scatter Plot | ||
| Library: plotly | ||
| """ | ||
|
|
||
| import numpy as np | ||
| import plotly.graph_objects as go | ||
|
|
||
|
|
||
| # Data | ||
| np.random.seed(42) | ||
| x = np.random.randn(100) * 2 + 10 | ||
| y = x * 0.8 + np.random.randn(100) * 2 | ||
|
|
||
| # Create figure | ||
| fig = go.Figure() | ||
|
|
||
| fig.add_trace(go.Scatter(x=x, y=y, mode="markers", marker={"size": 12, "color": "#306998", "opacity": 0.7})) | ||
|
|
||
| # Layout | ||
| fig.update_layout( | ||
| title={"text": "Basic Scatter Plot", "font": {"size": 40}, "x": 0.5, "xanchor": "center"}, | ||
| xaxis={ | ||
| "title": {"text": "X Value", "font": {"size": 40}}, | ||
| "tickfont": {"size": 32}, | ||
| "showgrid": True, | ||
| "gridcolor": "rgba(0, 0, 0, 0.1)", | ||
| }, | ||
| yaxis={ | ||
| "title": {"text": "Y Value", "font": {"size": 40}}, | ||
| "tickfont": {"size": 32}, | ||
| "showgrid": True, | ||
| "gridcolor": "rgba(0, 0, 0, 0.1)", | ||
| }, | ||
| template="plotly_white", | ||
| plot_bgcolor="white", | ||
| margin={"l": 120, "r": 50, "t": 100, "b": 100}, | ||
| ) | ||
|
|
||
| # Save (4800 x 2700 px) | ||
| fig.write_image("plot.png", width=1600, height=900, scale=3) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hard-coded data is inconsistent with all other implementations. Other libraries generate 100 random points with correlation, but this uses only 8 manually specified points. This creates a significantly different visualization and doesn't properly demonstrate a scatter plot's utility for many data points. Use the same data generation pattern as other libraries:
np.random.seed(42),x = np.random.randn(100) * 2 + 10,y = x * 0.8 + np.random.randn(100) * 2.