Skip to content

Commit 6fa90f0

Browse files
Copilotmrjf
andauthored
Add validity-oracle tests for Series.sortValues
Agent-Logs-Url: https://github.com/githubnext/tsessebe/sessions/86f2e7e1-7b2a-4501-86ca-c97d0483b5c0 Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com>
1 parent f021893 commit 6fa90f0

1 file changed

Lines changed: 377 additions & 0 deletions

File tree

Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
/**
2+
* Validity-oracle tests for `Series.sortValues`.
3+
*
4+
* These tests are the correctness gate for the `tsb-perf-evolve` AlphaEvolve
5+
* program: every candidate that mutates the implementation in
6+
* `src/core/series.ts` must keep all of these tests green before its benchmark
7+
* timing is even considered.
8+
*
9+
* Behaviour is anchored to pandas' `Series.sort_values` semantics where the
10+
* spec is otherwise ambiguous (NaN ordering, stability, empty Series, etc.).
11+
*/
12+
13+
import { describe, expect, it } from "bun:test";
14+
import * as fc from "fast-check";
15+
import { Dtype, Index, Series } from "../../src/index.ts";
16+
17+
// ─── helpers ──────────────────────────────────────────────────────────────────
18+
19+
/**
20+
* Element-wise equality that treats two NaNs as equal. Used to assert exact
21+
* NaN positions in the output rather than relying on `toEqual` matcher
22+
* semantics, so the intent of each NaN-related assertion is explicit.
23+
*/
24+
function arraysEqualWithNaN<T>(actual: readonly T[], expected: readonly T[]): boolean {
25+
if (actual.length !== expected.length) {
26+
return false;
27+
}
28+
for (let i = 0; i < expected.length; i++) {
29+
const a = actual[i];
30+
const e = expected[i];
31+
const aNaN = typeof a === "number" && Number.isNaN(a);
32+
const eNaN = typeof e === "number" && Number.isNaN(e);
33+
if (aNaN && eNaN) {
34+
continue;
35+
}
36+
if (aNaN !== eNaN) {
37+
return false;
38+
}
39+
if (a !== e) {
40+
return false;
41+
}
42+
}
43+
return true;
44+
}
45+
46+
// ─── numeric with NaN ─────────────────────────────────────────────────────────
47+
48+
describe("Series.sortValues — numeric with NaN", () => {
49+
// Same input is reused so the tests document the relationship between the
50+
// four (ascending, naPosition) combinations.
51+
const data = [3, Number.NaN, 1, 2, Number.NaN];
52+
const labels = ["a", "b", "c", "d", "e"];
53+
54+
it("ascending, naPosition='last' (default)", () => {
55+
const s = new Series<number>({ data, index: labels });
56+
const r = s.sortValues();
57+
expect(arraysEqualWithNaN(r.values, [1, 2, 3, Number.NaN, Number.NaN])).toBe(true);
58+
expect(r.index.toArray()).toEqual(["c", "d", "a", "b", "e"]);
59+
});
60+
61+
it("ascending, naPosition='first'", () => {
62+
const s = new Series<number>({ data, index: labels });
63+
const r = s.sortValues(true, "first");
64+
expect(arraysEqualWithNaN(r.values, [Number.NaN, Number.NaN, 1, 2, 3])).toBe(true);
65+
expect(r.index.toArray()).toEqual(["b", "e", "c", "d", "a"]);
66+
});
67+
68+
it("descending, naPosition='last'", () => {
69+
const s = new Series<number>({ data, index: labels });
70+
const r = s.sortValues(false, "last");
71+
expect(arraysEqualWithNaN(r.values, [3, 2, 1, Number.NaN, Number.NaN])).toBe(true);
72+
expect(r.index.toArray()).toEqual(["a", "d", "c", "b", "e"]);
73+
});
74+
75+
it("descending, naPosition='first'", () => {
76+
const s = new Series<number>({ data, index: labels });
77+
const r = s.sortValues(false, "first");
78+
expect(arraysEqualWithNaN(r.values, [Number.NaN, Number.NaN, 3, 2, 1])).toBe(true);
79+
expect(r.index.toArray()).toEqual(["b", "e", "a", "d", "c"]);
80+
});
81+
82+
it("preserves original indices in output (the value at position i kept its label)", () => {
83+
const s = new Series<number>({ data, index: labels });
84+
const r = s.sortValues();
85+
// For every output position, the value must equal s.at(label_at_that_position).
86+
for (let i = 0; i < r.size; i++) {
87+
const lbl = r.index.toArray()[i] as string;
88+
const out = r.values[i] as number;
89+
const original = s.at(lbl);
90+
if (Number.isNaN(out)) {
91+
expect(Number.isNaN(original)).toBe(true);
92+
} else {
93+
expect(out).toBe(original);
94+
}
95+
}
96+
});
97+
98+
it("treats null and NaN identically as missing", () => {
99+
const s = new Series<number | null>({
100+
data: [3, null, 1, Number.NaN, 2],
101+
index: ["a", "b", "c", "d", "e"],
102+
});
103+
const r = s.sortValues();
104+
expect(r.values.slice(0, 3)).toEqual([1, 2, 3]);
105+
// The last two are both "missing" — order between them is the input order
106+
// (stable sort), but they must both appear at the end.
107+
const tail = r.values.slice(3);
108+
for (const v of tail) {
109+
expect(v === null || (typeof v === "number" && Number.isNaN(v))).toBe(true);
110+
}
111+
});
112+
});
113+
114+
// ─── string ──────────────────────────────────────────────────────────────────
115+
116+
describe("Series.sortValues — string", () => {
117+
it("ascending lexicographic", () => {
118+
const s = new Series<string>({ data: ["banana", "apple", "cherry"] });
119+
expect(s.sortValues().values).toEqual(["apple", "banana", "cherry"]);
120+
});
121+
122+
it("descending lexicographic", () => {
123+
const s = new Series<string>({ data: ["banana", "apple", "cherry"] });
124+
expect(s.sortValues(false).values).toEqual(["cherry", "banana", "apple"]);
125+
});
126+
127+
it("places nulls last by default", () => {
128+
const s = new Series<string | null>({
129+
data: ["b", null, "a"],
130+
index: [10, 20, 30],
131+
});
132+
const r = s.sortValues();
133+
expect(r.values).toEqual(["a", "b", null]);
134+
expect(r.index.toArray()).toEqual([30, 10, 20]);
135+
});
136+
137+
it("places nulls first when requested", () => {
138+
const s = new Series<string | null>({
139+
data: ["b", null, "a"],
140+
index: [10, 20, 30],
141+
});
142+
const r = s.sortValues(true, "first");
143+
expect(r.values).toEqual([null, "a", "b"]);
144+
expect(r.index.toArray()).toEqual([20, 30, 10]);
145+
});
146+
147+
it("descending with nulls last", () => {
148+
const s = new Series<string | null>({ data: ["b", null, "a"] });
149+
expect(s.sortValues(false, "last").values).toEqual(["b", "a", null]);
150+
});
151+
});
152+
153+
// ─── mixed dtype (values + missing) ───────────────────────────────────────────
154+
155+
describe("Series.sortValues — mixed dtype with missing", () => {
156+
it("numeric series with all-NaN keeps stable order", () => {
157+
const s = new Series<number>({
158+
data: [Number.NaN, Number.NaN, Number.NaN],
159+
index: ["a", "b", "c"],
160+
});
161+
const r = s.sortValues();
162+
expect(r.size).toBe(3);
163+
expect(r.index.toArray()).toEqual(["a", "b", "c"]);
164+
});
165+
166+
it("numeric series with no missing values returns sorted permutation", () => {
167+
const s = new Series<number>({
168+
data: [5, -1, 0, 3.5, 2],
169+
index: ["a", "b", "c", "d", "e"],
170+
});
171+
const r = s.sortValues();
172+
expect(r.values).toEqual([-1, 0, 2, 3.5, 5]);
173+
expect(r.index.toArray()).toEqual(["b", "c", "e", "d", "a"]);
174+
});
175+
176+
it("integer dtype is preserved across sort", () => {
177+
const s = new Series<number>({ data: [3, 1, 2], dtype: Dtype.int64 });
178+
expect(s.sortValues().dtype).toBe(Dtype.int64);
179+
});
180+
});
181+
182+
// ─── empty Series ─────────────────────────────────────────────────────────────
183+
184+
describe("Series.sortValues — empty Series", () => {
185+
it("returns an empty Series with the same dtype and name", () => {
186+
const s = new Series<number>({
187+
data: [],
188+
name: "price",
189+
dtype: Dtype.float64,
190+
});
191+
const r = s.sortValues();
192+
expect(r.size).toBe(0);
193+
expect(r.values).toEqual([]);
194+
expect(r.dtype).toBe(Dtype.float64);
195+
expect(r.name).toBe("price");
196+
});
197+
198+
it("works with descending and naPosition options on an empty Series", () => {
199+
const s = new Series<string>({ data: [], name: null });
200+
expect(s.sortValues(false, "first").size).toBe(0);
201+
expect(s.sortValues(false, "last").size).toBe(0);
202+
expect(s.sortValues(true, "first").size).toBe(0);
203+
});
204+
});
205+
206+
// ─── index alignment invariant ────────────────────────────────────────────────
207+
208+
describe("Series.sortValues — index alignment", () => {
209+
it("output index at every position is the *originating* row's label", () => {
210+
// Build a series whose labels are unrelated to positions so we can detect a
211+
// candidate that confuses "the sorted index array" with the originating
212+
// row's index.
213+
const s = new Series<number>({
214+
data: [40, 10, 30, 20],
215+
index: ["w", "x", "y", "z"],
216+
});
217+
const r = s.sortValues();
218+
// Sorted values are [10, 20, 30, 40]; the labels that originally held those
219+
// values are ["x", "z", "y", "w"].
220+
expect(r.values).toEqual([10, 20, 30, 40]);
221+
expect(r.index.toArray()).toEqual(["x", "z", "y", "w"]);
222+
});
223+
224+
it("works with non-string (numeric) labels", () => {
225+
const s = new Series<number>({
226+
data: [3, 1, 2],
227+
index: new Index<number>([100, 200, 300]),
228+
});
229+
const r = s.sortValues();
230+
expect(r.values).toEqual([1, 2, 3]);
231+
expect(r.index.toArray()).toEqual([200, 300, 100]);
232+
});
233+
234+
it("preserves the Series name", () => {
235+
const s = new Series<number>({ data: [3, 1, 2], name: "metric" });
236+
expect(s.sortValues().name).toBe("metric");
237+
expect(s.sortValues(false).name).toBe("metric");
238+
});
239+
240+
it("does not mutate the input Series", () => {
241+
const data = [3, 1, 2];
242+
const s = new Series<number>({ data, index: ["a", "b", "c"] });
243+
const before = [...s.values];
244+
const beforeIdx = s.index.toArray();
245+
s.sortValues();
246+
expect([...s.values]).toEqual(before);
247+
expect(s.index.toArray()).toEqual(beforeIdx);
248+
});
249+
});
250+
251+
// ─── public signature ─────────────────────────────────────────────────────────
252+
253+
describe("Series.sortValues — public signature", () => {
254+
it("ascending defaults to true and naPosition defaults to 'last'", () => {
255+
const s = new Series<number | null>({ data: [2, null, 1] });
256+
const a = s.sortValues();
257+
const b = s.sortValues(true);
258+
const c = s.sortValues(true, "last");
259+
expect(a.values).toEqual(b.values);
260+
expect(b.values).toEqual(c.values);
261+
});
262+
263+
it("returns a Series<T> (compile-time check via type assertion)", () => {
264+
const s = new Series<number>({ data: [1, 2, 3] });
265+
// The next line is a compile-time check: if the signature changes the
266+
// assigned type would mismatch and `tsc --noEmit` would fail.
267+
const r: Series<number> = s.sortValues();
268+
expect(r.values).toEqual([1, 2, 3]);
269+
270+
const ss = new Series<string>({ data: ["b", "a"] });
271+
const rs: Series<string> = ss.sortValues();
272+
expect(rs.values).toEqual(["a", "b"]);
273+
});
274+
});
275+
276+
// ─── property-based checks ────────────────────────────────────────────────────
277+
278+
describe("Series.sortValues — property checks", () => {
279+
it("output length equals input length", () => {
280+
fc.assert(
281+
fc.property(fc.array(fc.integer()), (arr) => {
282+
const s = new Series<number>({ data: arr });
283+
expect(s.sortValues().size).toBe(arr.length);
284+
}),
285+
);
286+
});
287+
288+
it("output is a permutation of the input (numeric, no NaN)", () => {
289+
fc.assert(
290+
fc.property(fc.array(fc.integer()), (arr) => {
291+
const s = new Series<number>({ data: arr });
292+
const sorted = [...s.sortValues().values];
293+
expect(sorted.slice().sort((a, b) => a - b)).toEqual(arr.slice().sort((a, b) => a - b));
294+
}),
295+
);
296+
});
297+
298+
it("output is non-decreasing for ascending sort (numeric, no NaN)", () => {
299+
fc.assert(
300+
fc.property(fc.array(fc.integer()), (arr) => {
301+
const s = new Series<number>({ data: arr });
302+
const out = s.sortValues().values;
303+
for (let i = 1; i < out.length; i++) {
304+
expect(out[i] as number).toBeGreaterThanOrEqual(out[i - 1] as number);
305+
}
306+
}),
307+
);
308+
});
309+
310+
it("output is non-increasing for descending sort (numeric, no NaN)", () => {
311+
fc.assert(
312+
fc.property(fc.array(fc.integer()), (arr) => {
313+
const s = new Series<number>({ data: arr });
314+
const out = s.sortValues(false).values;
315+
for (let i = 1; i < out.length; i++) {
316+
expect(out[i] as number).toBeLessThanOrEqual(out[i - 1] as number);
317+
}
318+
}),
319+
);
320+
});
321+
322+
it("applying sortValues twice is idempotent up to ties (the values match)", () => {
323+
fc.assert(
324+
fc.property(fc.array(fc.integer()), (arr) => {
325+
const s = new Series<number>({ data: arr });
326+
const once = s.sortValues();
327+
const twice = once.sortValues();
328+
expect([...twice.values]).toEqual([...once.values]);
329+
}),
330+
);
331+
});
332+
333+
it("each output (label, value) pair is also an input (label, value) pair", () => {
334+
fc.assert(
335+
fc.property(
336+
fc.array(fc.integer(), { minLength: 1, maxLength: 50 }).chain((data) =>
337+
fc
338+
.uniqueArray(fc.integer({ min: 0, max: 10000 }), {
339+
minLength: data.length,
340+
maxLength: data.length,
341+
})
342+
.map((idx) => ({ data, idx })),
343+
),
344+
({ data, idx }) => {
345+
const s = new Series<number>({ data, index: idx });
346+
const r = s.sortValues();
347+
const labels = r.index.toArray();
348+
for (let i = 0; i < r.size; i++) {
349+
const lbl = labels[i] as number;
350+
expect(r.values[i]).toBe(s.at(lbl));
351+
}
352+
},
353+
),
354+
);
355+
});
356+
357+
it("sort is stable: equal values preserve original input order", () => {
358+
fc.assert(
359+
fc.property(
360+
fc.array(fc.integer({ min: 0, max: 4 }), { minLength: 0, maxLength: 30 }),
361+
(arr) => {
362+
// Tag each element with its original position; two elements with the
363+
// same value must appear in the output with their original positions
364+
// in increasing order.
365+
const s = new Series<number>({ data: arr });
366+
const r = s.sortValues();
367+
const positions = r.index.toArray() as number[];
368+
for (let i = 1; i < r.size; i++) {
369+
if ((r.values[i - 1] as number) === (r.values[i] as number)) {
370+
expect(positions[i - 1] as number).toBeLessThan(positions[i] as number);
371+
}
372+
}
373+
},
374+
),
375+
);
376+
});
377+
});

0 commit comments

Comments
 (0)