Skip to content

Commit 46d46aa

Browse files
[Autoloop: perf-comparison] Iteration 342: Add 2 benchmark pairs (to_json_denormalize, cut_bins_to_frame)
Run: https://github.com/githubnext/tsb/actions/runs/26974794187 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 05090de commit 46d46aa

4 files changed

Lines changed: 175 additions & 0 deletions

File tree

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Benchmark: cut_bins_to_frame — pd.cut with value_counts and bin summary on 100k rows."""
2+
import json, time
3+
import numpy as np
4+
import pandas as pd
5+
6+
SIZE = 100_000
7+
NUM_BINS = 20
8+
WARMUP = 5
9+
ITERATIONS = 50
10+
11+
data = np.array([(i % 1000) * 0.1 for i in range(SIZE)])
12+
13+
for _ in range(WARMUP):
14+
# pandas equivalent of cutBinsToFrame: cut + value_counts on the categorical result
15+
cut_result = pd.cut(data, NUM_BINS)
16+
# Summary DataFrame equivalent to cutBinsToFrame
17+
counts = cut_result.value_counts(sort=False)
18+
summary = pd.DataFrame({
19+
"bin": counts.index.astype(str),
20+
"left": [iv.left for iv in counts.index],
21+
"right": [iv.right for iv in counts.index],
22+
"count": counts.values,
23+
"frequency": counts.values / len(data),
24+
})
25+
# cutBinCounts equivalent: counts dict
26+
count_dict = dict(zip(counts.index.astype(str), counts.values))
27+
# binEdges equivalent: DataFrame of interval edges
28+
edges = pd.DataFrame({
29+
"left": [iv.left for iv in counts.index],
30+
"right": [iv.right for iv in counts.index],
31+
})
32+
33+
start = time.perf_counter()
34+
for _ in range(ITERATIONS):
35+
cut_result = pd.cut(data, NUM_BINS)
36+
counts = cut_result.value_counts(sort=False)
37+
summary = pd.DataFrame({
38+
"bin": counts.index.astype(str),
39+
"left": [iv.left for iv in counts.index],
40+
"right": [iv.right for iv in counts.index],
41+
"count": counts.values,
42+
"frequency": counts.values / len(data),
43+
})
44+
count_dict = dict(zip(counts.index.astype(str), counts.values))
45+
edges = pd.DataFrame({
46+
"left": [iv.left for iv in counts.index],
47+
"right": [iv.right for iv in counts.index],
48+
})
49+
total = (time.perf_counter() - start) * 1000
50+
51+
print(json.dumps({
52+
"function": "cut_bins_to_frame",
53+
"mean_ms": total / ITERATIONS,
54+
"iterations": ITERATIONS,
55+
"total_ms": total,
56+
}))
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""Benchmark: to_json_denormalize — json orient variants on 10k-row DataFrame."""
2+
import json, time
3+
import numpy as np
4+
import pandas as pd
5+
6+
ROWS = 10_000
7+
WARMUP = 5
8+
ITERATIONS = 30
9+
10+
# DataFrame matching the tsb benchmark (nested-structure-like columns)
11+
df = pd.DataFrame({
12+
"name": [f"user_{i}" for i in range(ROWS)],
13+
"address.city": [f"city_{i % 100}" for i in range(ROWS)],
14+
"address.zip": [str(10000 + (i % 9000)) for i in range(ROWS)],
15+
"score": np.arange(ROWS) * 0.01,
16+
})
17+
18+
for _ in range(WARMUP):
19+
# pandas equivalent of toJsonDenormalize: to_dict("records") then reconstruct nesting
20+
recs = df.to_dict("records")
21+
# pandas equivalent of toJsonRecords: orient="records"
22+
df.to_json(orient="records")
23+
# pandas equivalent of toJsonSplit: orient="split"
24+
df.to_json(orient="split")
25+
# pandas equivalent of toJsonIndex: orient="index"
26+
df.to_json(orient="index")
27+
28+
start = time.perf_counter()
29+
for _ in range(ITERATIONS):
30+
recs = df.to_dict("records")
31+
df.to_json(orient="records")
32+
df.to_json(orient="split")
33+
df.to_json(orient="index")
34+
total = (time.perf_counter() - start) * 1000
35+
36+
print(json.dumps({
37+
"function": "to_json_denormalize",
38+
"mean_ms": total / ITERATIONS,
39+
"iterations": ITERATIONS,
40+
"total_ms": total,
41+
}))
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/**
2+
* Benchmark: cut_bins_to_frame — cutBinsToFrame / cutBinCounts / binEdges on 100k data points.
3+
* Outputs JSON: {"function": "cut_bins_to_frame", "mean_ms": ..., "iterations": ..., "total_ms": ...}
4+
*/
5+
import { cut, cutBinsToFrame, cutBinCounts, binEdges } from "../../src/index.ts";
6+
7+
const SIZE = 100_000;
8+
const NUM_BINS = 20;
9+
const WARMUP = 5;
10+
const ITERATIONS = 50;
11+
12+
const data = Array.from({ length: SIZE }, (_, i) => (i % 1000) * 0.1);
13+
const binResult = cut(data, NUM_BINS);
14+
15+
for (let i = 0; i < WARMUP; i++) {
16+
cutBinsToFrame(binResult, { data });
17+
cutBinCounts(binResult);
18+
binEdges(binResult);
19+
}
20+
21+
const start = performance.now();
22+
for (let i = 0; i < ITERATIONS; i++) {
23+
cutBinsToFrame(binResult, { data });
24+
cutBinCounts(binResult);
25+
binEdges(binResult);
26+
}
27+
const total = performance.now() - start;
28+
29+
console.log(
30+
JSON.stringify({
31+
function: "cut_bins_to_frame",
32+
mean_ms: total / ITERATIONS,
33+
iterations: ITERATIONS,
34+
total_ms: total,
35+
}),
36+
);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/**
2+
* Benchmark: to_json_denormalize — toJsonDenormalize / toJsonRecords / toJsonSplit / toJsonIndex
3+
* Outputs JSON: {"function": "to_json_denormalize", "mean_ms": ..., "iterations": ..., "total_ms": ...}
4+
*/
5+
import { DataFrame, toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex } from "../../src/index.ts";
6+
7+
const ROWS = 10_000;
8+
const WARMUP = 5;
9+
const ITERATIONS = 30;
10+
11+
// Create a nested-structure-like DataFrame (address.city, address.zip pattern)
12+
const df = DataFrame.fromColumns({
13+
"name": Array.from({ length: ROWS }, (_, i) => `user_${i}`),
14+
"address.city": Array.from({ length: ROWS }, (_, i) => `city_${i % 100}`),
15+
"address.zip": Array.from({ length: ROWS }, (_, i) => `${10000 + (i % 9000)}`),
16+
"score": Float64Array.from({ length: ROWS }, (_, i) => i * 0.01),
17+
});
18+
19+
for (let i = 0; i < WARMUP; i++) {
20+
toJsonDenormalize(df);
21+
toJsonRecords(df);
22+
toJsonSplit(df);
23+
toJsonIndex(df);
24+
}
25+
26+
const start = performance.now();
27+
for (let i = 0; i < ITERATIONS; i++) {
28+
toJsonDenormalize(df);
29+
toJsonRecords(df);
30+
toJsonSplit(df);
31+
toJsonIndex(df);
32+
}
33+
const total = performance.now() - start;
34+
35+
console.log(
36+
JSON.stringify({
37+
function: "to_json_denormalize",
38+
mean_ms: total / ITERATIONS,
39+
iterations: ITERATIONS,
40+
total_ms: total,
41+
}),
42+
);

0 commit comments

Comments
 (0)