Skip to content

Commit 3096f8b

Browse files
authored
Merge pull request #324 from githubnext/autoloop/perf-comparison
[Autoloop: perf-comparison]
2 parents 6c03cb8 + 67a896a commit 3096f8b

4 files changed

Lines changed: 186 additions & 0 deletions

File tree

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Benchmark: pandas.array() — create and iterate typed arrays.
3+
Outputs JSON: {"function": "pd_array", "mean_ms": ..., "iterations": ..., "total_ms": ...}
4+
"""
5+
import json
6+
import time
7+
import pandas as pd
8+
import numpy as np
9+
10+
SIZE = 10_000
11+
WARMUP = 5
12+
ITERATIONS = 100
13+
14+
int_data = list(range(SIZE))
15+
float_data = [i * 0.5 for i in range(SIZE)]
16+
string_data = [f"item_{i % 100}" for i in range(SIZE)]
17+
mixed_data = [None if i % 3 == 0 else i for i in range(SIZE)]
18+
19+
20+
def run():
21+
a = pd.array(int_data, dtype="Int64")
22+
b = pd.array(float_data, dtype="Float64")
23+
c = pd.array(string_data, dtype="string")
24+
d = pd.array(mixed_data, dtype="Int64")
25+
26+
# Access elements
27+
_ = a[-1]
28+
_ = b[0]
29+
_ = len(c)
30+
_ = d[0]
31+
32+
33+
for _ in range(WARMUP):
34+
run()
35+
36+
start = time.perf_counter()
37+
for _ in range(ITERATIONS):
38+
run()
39+
total_ms = (time.perf_counter() - start) * 1000
40+
41+
print(
42+
json.dumps(
43+
{
44+
"function": "pd_array",
45+
"mean_ms": total_ms / ITERATIONS,
46+
"iterations": ITERATIONS,
47+
"total_ms": total_ms,
48+
}
49+
)
50+
)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Benchmark: DataFrame.to_dict / DataFrame.from_dict — dict orient conversions.
3+
Tests: list, records, split, index orient round-trips on a 10k-row DataFrame.
4+
Outputs JSON: {"function": "to_from_dict", "mean_ms": ..., "iterations": ..., "total_ms": ...}
5+
"""
6+
import time
7+
import json
8+
import pandas as pd
9+
10+
SIZE = 10_000
11+
WARMUP = 5
12+
ITERATIONS = 50
13+
14+
df = pd.DataFrame({
15+
"a": list(range(SIZE)),
16+
"b": [i * 1.5 for i in range(SIZE)],
17+
"c": [f"str_{i % 100}" for i in range(SIZE)],
18+
})
19+
20+
small_list = {"a": [1, 2, 3], "b": [4, 5, 6]}
21+
small_df = pd.DataFrame(small_list)
22+
small_index = {0: {"a": 1, "b": 4}, 1: {"a": 2, "b": 5}}
23+
24+
for _ in range(WARMUP):
25+
df.to_dict(orient="list")
26+
df.to_dict(orient="records")
27+
df.to_dict(orient="split")
28+
df.to_dict(orient="index")
29+
pd.DataFrame.from_dict(small_list)
30+
pd.DataFrame.from_dict(small_index, orient="index")
31+
32+
start = time.perf_counter()
33+
for _ in range(ITERATIONS):
34+
df.to_dict(orient="list")
35+
df.to_dict(orient="records")
36+
df.to_dict(orient="split")
37+
df.to_dict(orient="index")
38+
pd.DataFrame.from_dict(small_list)
39+
pd.DataFrame.from_dict(small_index, orient="index")
40+
total = (time.perf_counter() - start) * 1000
41+
42+
print(json.dumps({
43+
"function": "to_from_dict",
44+
"mean_ms": round(total / ITERATIONS, 3),
45+
"iterations": ITERATIONS,
46+
"total_ms": round(total, 3),
47+
}))

benchmarks/tsb/bench_pd_array.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/**
2+
* Benchmark: pdArray / PandasArray — create and iterate typed arrays.
3+
* Outputs JSON: {"function": "pd_array", "mean_ms": ..., "iterations": ..., "total_ms": ...}
4+
*/
5+
import { pdArray } from "../../src/index.js";
6+
7+
const SIZE = 10_000;
8+
const WARMUP = 5;
9+
const ITERATIONS = 100;
10+
11+
const intData = Array.from({ length: SIZE }, (_, i) => i);
12+
const floatData = Array.from({ length: SIZE }, (_, i) => i * 0.5);
13+
const stringData = Array.from({ length: SIZE }, (_, i) => `item_${i % 100}`);
14+
const mixedData = Array.from({ length: SIZE }, (_, i) => (i % 3 === 0 ? null : i));
15+
16+
function run(): void {
17+
const a = pdArray(intData, "int64");
18+
const b = pdArray(floatData, "float64");
19+
const c = pdArray(stringData, "string");
20+
const d = pdArray(mixedData);
21+
22+
// Access elements and iterate
23+
void a.at(SIZE - 1);
24+
void b.toArray();
25+
void c.at(0);
26+
void d.length;
27+
}
28+
29+
for (let i = 0; i < WARMUP; i++) run();
30+
31+
const start = performance.now();
32+
for (let i = 0; i < ITERATIONS; i++) run();
33+
const total = performance.now() - start;
34+
35+
console.log(
36+
JSON.stringify({
37+
function: "pd_array",
38+
mean_ms: total / ITERATIONS,
39+
iterations: ITERATIONS,
40+
total_ms: total,
41+
}),
42+
);
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* Benchmark: toDictOriented / fromDictOriented — DataFrame ↔ dict conversions.
3+
* Tests all orient variants: "list", "records", "split", "index", "tight".
4+
* Outputs JSON: {"function": "to_from_dict", "mean_ms": ..., "iterations": ..., "total_ms": ...}
5+
*/
6+
import { DataFrame, toDictOriented, fromDictOriented } from "../../src/index.js";
7+
8+
const SIZE = 10_000;
9+
const WARMUP = 5;
10+
const ITERATIONS = 50;
11+
12+
const df = new DataFrame({
13+
a: Array.from({ length: SIZE }, (_, i) => i),
14+
b: Array.from({ length: SIZE }, (_, i) => i * 1.5),
15+
c: Array.from({ length: SIZE }, (_, i) => `str_${i % 100}`),
16+
});
17+
18+
for (let i = 0; i < WARMUP; i++) {
19+
toDictOriented(df, "list");
20+
toDictOriented(df, "records");
21+
toDictOriented(df, "split");
22+
toDictOriented(df, "index");
23+
toDictOriented(df, "tight");
24+
fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] });
25+
fromDictOriented({ 0: { a: 1, b: 4 }, 1: { a: 2, b: 5 } }, "index");
26+
}
27+
28+
const start = performance.now();
29+
for (let i = 0; i < ITERATIONS; i++) {
30+
toDictOriented(df, "list");
31+
toDictOriented(df, "records");
32+
toDictOriented(df, "split");
33+
toDictOriented(df, "index");
34+
toDictOriented(df, "tight");
35+
fromDictOriented({ a: [1, 2, 3], b: [4, 5, 6] });
36+
fromDictOriented({ 0: { a: 1, b: 4 }, 1: { a: 2, b: 5 } }, "index");
37+
}
38+
const total = performance.now() - start;
39+
40+
console.log(
41+
JSON.stringify({
42+
function: "to_from_dict",
43+
mean_ms: Math.round((total / ITERATIONS) * 1000) / 1000,
44+
iterations: ITERATIONS,
45+
total_ms: Math.round(total * 1000) / 1000,
46+
}),
47+
);

0 commit comments

Comments
 (0)