|
| 1 | +/** |
| 2 | + * Validity-oracle tests for `Series.sortValues`. |
| 3 | + * |
| 4 | + * These tests are the correctness gate for the `tsb-perf-evolve` AlphaEvolve |
| 5 | + * program: every candidate that mutates the implementation in |
| 6 | + * `src/core/series.ts` must keep all of these tests green before its benchmark |
| 7 | + * timing is even considered. |
| 8 | + * |
| 9 | + * Behaviour is anchored to pandas' `Series.sort_values` semantics where the |
| 10 | + * spec is otherwise ambiguous (NaN ordering, stability, empty Series, etc.). |
| 11 | + */ |
| 12 | + |
| 13 | +import { describe, expect, it } from "bun:test"; |
| 14 | +import * as fc from "fast-check"; |
| 15 | +import { Dtype, Index, Series } from "../../src/index.ts"; |
| 16 | + |
| 17 | +// ─── helpers ────────────────────────────────────────────────────────────────── |
| 18 | + |
| 19 | +/** |
| 20 | + * Element-wise equality that treats two NaNs as equal. Used to assert exact |
| 21 | + * NaN positions in the output rather than relying on `toEqual` matcher |
| 22 | + * semantics, so the intent of each NaN-related assertion is explicit. |
| 23 | + */ |
| 24 | +function arraysEqualWithNaN<T>(actual: readonly T[], expected: readonly T[]): boolean { |
| 25 | + if (actual.length !== expected.length) { |
| 26 | + return false; |
| 27 | + } |
| 28 | + for (let i = 0; i < expected.length; i++) { |
| 29 | + const a = actual[i]; |
| 30 | + const e = expected[i]; |
| 31 | + const aNaN = typeof a === "number" && Number.isNaN(a); |
| 32 | + const eNaN = typeof e === "number" && Number.isNaN(e); |
| 33 | + if (aNaN && eNaN) { |
| 34 | + continue; |
| 35 | + } |
| 36 | + if (aNaN !== eNaN) { |
| 37 | + return false; |
| 38 | + } |
| 39 | + if (a !== e) { |
| 40 | + return false; |
| 41 | + } |
| 42 | + } |
| 43 | + return true; |
| 44 | +} |
| 45 | + |
| 46 | +// ─── numeric with NaN ───────────────────────────────────────────────────────── |
| 47 | + |
| 48 | +describe("Series.sortValues — numeric with NaN", () => { |
| 49 | + // Same input is reused so the tests document the relationship between the |
| 50 | + // four (ascending, naPosition) combinations. |
| 51 | + const data = [3, Number.NaN, 1, 2, Number.NaN]; |
| 52 | + const labels = ["a", "b", "c", "d", "e"]; |
| 53 | + |
| 54 | + it("ascending, naPosition='last' (default)", () => { |
| 55 | + const s = new Series<number>({ data, index: labels }); |
| 56 | + const r = s.sortValues(); |
| 57 | + expect(arraysEqualWithNaN(r.values, [1, 2, 3, Number.NaN, Number.NaN])).toBe(true); |
| 58 | + expect(r.index.toArray()).toEqual(["c", "d", "a", "b", "e"]); |
| 59 | + }); |
| 60 | + |
| 61 | + it("ascending, naPosition='first'", () => { |
| 62 | + const s = new Series<number>({ data, index: labels }); |
| 63 | + const r = s.sortValues(true, "first"); |
| 64 | + expect(arraysEqualWithNaN(r.values, [Number.NaN, Number.NaN, 1, 2, 3])).toBe(true); |
| 65 | + expect(r.index.toArray()).toEqual(["b", "e", "c", "d", "a"]); |
| 66 | + }); |
| 67 | + |
| 68 | + it("descending, naPosition='last'", () => { |
| 69 | + const s = new Series<number>({ data, index: labels }); |
| 70 | + const r = s.sortValues(false, "last"); |
| 71 | + expect(arraysEqualWithNaN(r.values, [3, 2, 1, Number.NaN, Number.NaN])).toBe(true); |
| 72 | + expect(r.index.toArray()).toEqual(["a", "d", "c", "b", "e"]); |
| 73 | + }); |
| 74 | + |
| 75 | + it("descending, naPosition='first'", () => { |
| 76 | + const s = new Series<number>({ data, index: labels }); |
| 77 | + const r = s.sortValues(false, "first"); |
| 78 | + expect(arraysEqualWithNaN(r.values, [Number.NaN, Number.NaN, 3, 2, 1])).toBe(true); |
| 79 | + expect(r.index.toArray()).toEqual(["b", "e", "a", "d", "c"]); |
| 80 | + }); |
| 81 | + |
| 82 | + it("preserves original indices in output (the value at position i kept its label)", () => { |
| 83 | + const s = new Series<number>({ data, index: labels }); |
| 84 | + const r = s.sortValues(); |
| 85 | + // For every output position, the value must equal s.at(label_at_that_position). |
| 86 | + for (let i = 0; i < r.size; i++) { |
| 87 | + const lbl = r.index.toArray()[i] as string; |
| 88 | + const out = r.values[i] as number; |
| 89 | + const original = s.at(lbl); |
| 90 | + if (Number.isNaN(out)) { |
| 91 | + expect(Number.isNaN(original)).toBe(true); |
| 92 | + } else { |
| 93 | + expect(out).toBe(original); |
| 94 | + } |
| 95 | + } |
| 96 | + }); |
| 97 | + |
| 98 | + it("treats null and NaN identically as missing", () => { |
| 99 | + const s = new Series<number | null>({ |
| 100 | + data: [3, null, 1, Number.NaN, 2], |
| 101 | + index: ["a", "b", "c", "d", "e"], |
| 102 | + }); |
| 103 | + const r = s.sortValues(); |
| 104 | + expect(r.values.slice(0, 3)).toEqual([1, 2, 3]); |
| 105 | + // The last two are both "missing" — order between them is the input order |
| 106 | + // (stable sort), but they must both appear at the end. |
| 107 | + const tail = r.values.slice(3); |
| 108 | + for (const v of tail) { |
| 109 | + expect(v === null || (typeof v === "number" && Number.isNaN(v))).toBe(true); |
| 110 | + } |
| 111 | + }); |
| 112 | +}); |
| 113 | + |
| 114 | +// ─── string ────────────────────────────────────────────────────────────────── |
| 115 | + |
| 116 | +describe("Series.sortValues — string", () => { |
| 117 | + it("ascending lexicographic", () => { |
| 118 | + const s = new Series<string>({ data: ["banana", "apple", "cherry"] }); |
| 119 | + expect(s.sortValues().values).toEqual(["apple", "banana", "cherry"]); |
| 120 | + }); |
| 121 | + |
| 122 | + it("descending lexicographic", () => { |
| 123 | + const s = new Series<string>({ data: ["banana", "apple", "cherry"] }); |
| 124 | + expect(s.sortValues(false).values).toEqual(["cherry", "banana", "apple"]); |
| 125 | + }); |
| 126 | + |
| 127 | + it("places nulls last by default", () => { |
| 128 | + const s = new Series<string | null>({ |
| 129 | + data: ["b", null, "a"], |
| 130 | + index: [10, 20, 30], |
| 131 | + }); |
| 132 | + const r = s.sortValues(); |
| 133 | + expect(r.values).toEqual(["a", "b", null]); |
| 134 | + expect(r.index.toArray()).toEqual([30, 10, 20]); |
| 135 | + }); |
| 136 | + |
| 137 | + it("places nulls first when requested", () => { |
| 138 | + const s = new Series<string | null>({ |
| 139 | + data: ["b", null, "a"], |
| 140 | + index: [10, 20, 30], |
| 141 | + }); |
| 142 | + const r = s.sortValues(true, "first"); |
| 143 | + expect(r.values).toEqual([null, "a", "b"]); |
| 144 | + expect(r.index.toArray()).toEqual([20, 30, 10]); |
| 145 | + }); |
| 146 | + |
| 147 | + it("descending with nulls last", () => { |
| 148 | + const s = new Series<string | null>({ data: ["b", null, "a"] }); |
| 149 | + expect(s.sortValues(false, "last").values).toEqual(["b", "a", null]); |
| 150 | + }); |
| 151 | +}); |
| 152 | + |
| 153 | +// ─── mixed dtype (values + missing) ─────────────────────────────────────────── |
| 154 | + |
| 155 | +describe("Series.sortValues — mixed dtype with missing", () => { |
| 156 | + it("numeric series with all-NaN keeps stable order", () => { |
| 157 | + const s = new Series<number>({ |
| 158 | + data: [Number.NaN, Number.NaN, Number.NaN], |
| 159 | + index: ["a", "b", "c"], |
| 160 | + }); |
| 161 | + const r = s.sortValues(); |
| 162 | + expect(r.size).toBe(3); |
| 163 | + expect(r.index.toArray()).toEqual(["a", "b", "c"]); |
| 164 | + }); |
| 165 | + |
| 166 | + it("numeric series with no missing values returns sorted permutation", () => { |
| 167 | + const s = new Series<number>({ |
| 168 | + data: [5, -1, 0, 3.5, 2], |
| 169 | + index: ["a", "b", "c", "d", "e"], |
| 170 | + }); |
| 171 | + const r = s.sortValues(); |
| 172 | + expect(r.values).toEqual([-1, 0, 2, 3.5, 5]); |
| 173 | + expect(r.index.toArray()).toEqual(["b", "c", "e", "d", "a"]); |
| 174 | + }); |
| 175 | + |
| 176 | + it("integer dtype is preserved across sort", () => { |
| 177 | + const s = new Series<number>({ data: [3, 1, 2], dtype: Dtype.int64 }); |
| 178 | + expect(s.sortValues().dtype).toBe(Dtype.int64); |
| 179 | + }); |
| 180 | +}); |
| 181 | + |
| 182 | +// ─── empty Series ───────────────────────────────────────────────────────────── |
| 183 | + |
| 184 | +describe("Series.sortValues — empty Series", () => { |
| 185 | + it("returns an empty Series with the same dtype and name", () => { |
| 186 | + const s = new Series<number>({ |
| 187 | + data: [], |
| 188 | + name: "price", |
| 189 | + dtype: Dtype.float64, |
| 190 | + }); |
| 191 | + const r = s.sortValues(); |
| 192 | + expect(r.size).toBe(0); |
| 193 | + expect(r.values).toEqual([]); |
| 194 | + expect(r.dtype).toBe(Dtype.float64); |
| 195 | + expect(r.name).toBe("price"); |
| 196 | + }); |
| 197 | + |
| 198 | + it("works with descending and naPosition options on an empty Series", () => { |
| 199 | + const s = new Series<string>({ data: [], name: null }); |
| 200 | + expect(s.sortValues(false, "first").size).toBe(0); |
| 201 | + expect(s.sortValues(false, "last").size).toBe(0); |
| 202 | + expect(s.sortValues(true, "first").size).toBe(0); |
| 203 | + }); |
| 204 | +}); |
| 205 | + |
| 206 | +// ─── index alignment invariant ──────────────────────────────────────────────── |
| 207 | + |
| 208 | +describe("Series.sortValues — index alignment", () => { |
| 209 | + it("output index at every position is the *originating* row's label", () => { |
| 210 | + // Build a series whose labels are unrelated to positions so we can detect a |
| 211 | + // candidate that confuses "the sorted index array" with the originating |
| 212 | + // row's index. |
| 213 | + const s = new Series<number>({ |
| 214 | + data: [40, 10, 30, 20], |
| 215 | + index: ["w", "x", "y", "z"], |
| 216 | + }); |
| 217 | + const r = s.sortValues(); |
| 218 | + // Sorted values are [10, 20, 30, 40]; the labels that originally held those |
| 219 | + // values are ["x", "z", "y", "w"]. |
| 220 | + expect(r.values).toEqual([10, 20, 30, 40]); |
| 221 | + expect(r.index.toArray()).toEqual(["x", "z", "y", "w"]); |
| 222 | + }); |
| 223 | + |
| 224 | + it("works with non-string (numeric) labels", () => { |
| 225 | + const s = new Series<number>({ |
| 226 | + data: [3, 1, 2], |
| 227 | + index: new Index<number>([100, 200, 300]), |
| 228 | + }); |
| 229 | + const r = s.sortValues(); |
| 230 | + expect(r.values).toEqual([1, 2, 3]); |
| 231 | + expect(r.index.toArray()).toEqual([200, 300, 100]); |
| 232 | + }); |
| 233 | + |
| 234 | + it("preserves the Series name", () => { |
| 235 | + const s = new Series<number>({ data: [3, 1, 2], name: "metric" }); |
| 236 | + expect(s.sortValues().name).toBe("metric"); |
| 237 | + expect(s.sortValues(false).name).toBe("metric"); |
| 238 | + }); |
| 239 | + |
| 240 | + it("does not mutate the input Series", () => { |
| 241 | + const data = [3, 1, 2]; |
| 242 | + const s = new Series<number>({ data, index: ["a", "b", "c"] }); |
| 243 | + const before = [...s.values]; |
| 244 | + const beforeIdx = s.index.toArray(); |
| 245 | + s.sortValues(); |
| 246 | + expect([...s.values]).toEqual(before); |
| 247 | + expect(s.index.toArray()).toEqual(beforeIdx); |
| 248 | + }); |
| 249 | +}); |
| 250 | + |
| 251 | +// ─── public signature ───────────────────────────────────────────────────────── |
| 252 | + |
| 253 | +describe("Series.sortValues — public signature", () => { |
| 254 | + it("ascending defaults to true and naPosition defaults to 'last'", () => { |
| 255 | + const s = new Series<number | null>({ data: [2, null, 1] }); |
| 256 | + const a = s.sortValues(); |
| 257 | + const b = s.sortValues(true); |
| 258 | + const c = s.sortValues(true, "last"); |
| 259 | + expect(a.values).toEqual(b.values); |
| 260 | + expect(b.values).toEqual(c.values); |
| 261 | + }); |
| 262 | + |
| 263 | + it("returns a Series<T> (compile-time check via type assertion)", () => { |
| 264 | + const s = new Series<number>({ data: [1, 2, 3] }); |
| 265 | + // The next line is a compile-time check: if the signature changes the |
| 266 | + // assigned type would mismatch and `tsc --noEmit` would fail. |
| 267 | + const r: Series<number> = s.sortValues(); |
| 268 | + expect(r.values).toEqual([1, 2, 3]); |
| 269 | + |
| 270 | + const ss = new Series<string>({ data: ["b", "a"] }); |
| 271 | + const rs: Series<string> = ss.sortValues(); |
| 272 | + expect(rs.values).toEqual(["a", "b"]); |
| 273 | + }); |
| 274 | +}); |
| 275 | + |
| 276 | +// ─── property-based checks ──────────────────────────────────────────────────── |
| 277 | + |
| 278 | +describe("Series.sortValues — property checks", () => { |
| 279 | + it("output length equals input length", () => { |
| 280 | + fc.assert( |
| 281 | + fc.property(fc.array(fc.integer()), (arr) => { |
| 282 | + const s = new Series<number>({ data: arr }); |
| 283 | + expect(s.sortValues().size).toBe(arr.length); |
| 284 | + }), |
| 285 | + ); |
| 286 | + }); |
| 287 | + |
| 288 | + it("output is a permutation of the input (numeric, no NaN)", () => { |
| 289 | + fc.assert( |
| 290 | + fc.property(fc.array(fc.integer()), (arr) => { |
| 291 | + const s = new Series<number>({ data: arr }); |
| 292 | + const sorted = [...s.sortValues().values]; |
| 293 | + expect(sorted.slice().sort((a, b) => a - b)).toEqual(arr.slice().sort((a, b) => a - b)); |
| 294 | + }), |
| 295 | + ); |
| 296 | + }); |
| 297 | + |
| 298 | + it("output is non-decreasing for ascending sort (numeric, no NaN)", () => { |
| 299 | + fc.assert( |
| 300 | + fc.property(fc.array(fc.integer()), (arr) => { |
| 301 | + const s = new Series<number>({ data: arr }); |
| 302 | + const out = s.sortValues().values; |
| 303 | + for (let i = 1; i < out.length; i++) { |
| 304 | + expect(out[i] as number).toBeGreaterThanOrEqual(out[i - 1] as number); |
| 305 | + } |
| 306 | + }), |
| 307 | + ); |
| 308 | + }); |
| 309 | + |
| 310 | + it("output is non-increasing for descending sort (numeric, no NaN)", () => { |
| 311 | + fc.assert( |
| 312 | + fc.property(fc.array(fc.integer()), (arr) => { |
| 313 | + const s = new Series<number>({ data: arr }); |
| 314 | + const out = s.sortValues(false).values; |
| 315 | + for (let i = 1; i < out.length; i++) { |
| 316 | + expect(out[i] as number).toBeLessThanOrEqual(out[i - 1] as number); |
| 317 | + } |
| 318 | + }), |
| 319 | + ); |
| 320 | + }); |
| 321 | + |
| 322 | + it("applying sortValues twice is idempotent up to ties (the values match)", () => { |
| 323 | + fc.assert( |
| 324 | + fc.property(fc.array(fc.integer()), (arr) => { |
| 325 | + const s = new Series<number>({ data: arr }); |
| 326 | + const once = s.sortValues(); |
| 327 | + const twice = once.sortValues(); |
| 328 | + expect([...twice.values]).toEqual([...once.values]); |
| 329 | + }), |
| 330 | + ); |
| 331 | + }); |
| 332 | + |
| 333 | + it("each output (label, value) pair is also an input (label, value) pair", () => { |
| 334 | + fc.assert( |
| 335 | + fc.property( |
| 336 | + fc.array(fc.integer(), { minLength: 1, maxLength: 50 }).chain((data) => |
| 337 | + fc |
| 338 | + .uniqueArray(fc.integer({ min: 0, max: 10000 }), { |
| 339 | + minLength: data.length, |
| 340 | + maxLength: data.length, |
| 341 | + }) |
| 342 | + .map((idx) => ({ data, idx })), |
| 343 | + ), |
| 344 | + ({ data, idx }) => { |
| 345 | + const s = new Series<number>({ data, index: idx }); |
| 346 | + const r = s.sortValues(); |
| 347 | + const labels = r.index.toArray(); |
| 348 | + for (let i = 0; i < r.size; i++) { |
| 349 | + const lbl = labels[i] as number; |
| 350 | + expect(r.values[i]).toBe(s.at(lbl)); |
| 351 | + } |
| 352 | + }, |
| 353 | + ), |
| 354 | + ); |
| 355 | + }); |
| 356 | + |
| 357 | + it("sort is stable: equal values preserve original input order", () => { |
| 358 | + fc.assert( |
| 359 | + fc.property( |
| 360 | + fc.array(fc.integer({ min: 0, max: 4 }), { minLength: 0, maxLength: 30 }), |
| 361 | + (arr) => { |
| 362 | + // Tag each element with its original position; two elements with the |
| 363 | + // same value must appear in the output with their original positions |
| 364 | + // in increasing order. |
| 365 | + const s = new Series<number>({ data: arr }); |
| 366 | + const r = s.sortValues(); |
| 367 | + const positions = r.index.toArray() as number[]; |
| 368 | + for (let i = 1; i < r.size; i++) { |
| 369 | + if ((r.values[i - 1] as number) === (r.values[i] as number)) { |
| 370 | + expect(positions[i - 1] as number).toBeLessThan(positions[i] as number); |
| 371 | + } |
| 372 | + } |
| 373 | + }, |
| 374 | + ), |
| 375 | + ); |
| 376 | + }); |
| 377 | +}); |
0 commit comments