-
-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathColumnStoreTests.cs
More file actions
514 lines (394 loc) · 16 KB
/
ColumnStoreTests.cs
File metadata and controls
514 lines (394 loc) · 16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
// <copyright file="ColumnStoreTests.cs" company="MPCoreDeveloper">
// Copyright (c) 2024-2025 MPCoreDeveloper and GitHub Copilot. All rights reserved.
// Licensed under the MIT License. See LICENSE file in the project root for full license information.
// </copyright>
namespace SharpCoreDB.Tests;
using SharpCoreDB.ColumnStorage;
using System.Diagnostics;
using Xunit;
/// <summary>
/// Tests for columnar storage with SIMD-optimized aggregates.
/// Target: Aggregates on 10k records in < 2ms.
/// </summary>
public sealed class ColumnStoreTests
{
#region Test Data Models
public sealed record SalesRecord(
int Id,
string Product,
decimal Price,
int Quantity,
DateTime OrderDate,
string Region);
public sealed record EmployeeRecord(
int Id,
string Name,
int Age,
decimal Salary,
string Department,
DateTime HireDate);
public sealed record MetricsRecord(
int Id,
double Value,
long Timestamp,
string MetricName,
double Average);
#endregion
#region Transpose Tests
[Fact]
public void ColumnStore_Transpose_ConvertsRowsToColumns()
{
// Arrange
var employees = new[]
{
new EmployeeRecord(1, "Alice", 30, 100000m, "Engineering", DateTime.Now),
new EmployeeRecord(2, "Bob", 25, 80000m, "Sales", DateTime.Now),
new EmployeeRecord(3, "Charlie", 35, 120000m, "Engineering", DateTime.Now),
};
var columnStore = new ColumnStore<EmployeeRecord>();
// Act
columnStore.Transpose(employees);
// Assert
Assert.Equal(3, columnStore.RowCount);
Assert.Contains("Age", columnStore.ColumnNames);
Assert.Contains("Salary", columnStore.ColumnNames);
Assert.Contains("Department", columnStore.ColumnNames);
Console.WriteLine($"? Transposed {employees.Length} rows to {columnStore.ColumnNames.Count} columns");
Console.WriteLine($" Columns: {string.Join(", ", columnStore.ColumnNames)}");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_Transpose_10kRecords_Fast()
{
// Arrange
var records = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
// Act
var sw = Stopwatch.StartNew();
columnStore.Transpose(records);
sw.Stop();
// Assert
Assert.Equal(10_000, columnStore.RowCount);
Assert.True(sw.ElapsedMilliseconds < 50,
$"Expected < 50ms for transpose, got {sw.ElapsedMilliseconds}ms");
Console.WriteLine($"? Transposed 10k records in {sw.ElapsedMilliseconds}ms");
Console.WriteLine($" Throughput: {10000.0 / sw.Elapsed.TotalSeconds:N0} rows/sec");
columnStore.Dispose();
}
#endregion
#region SUM Aggregate Tests
[Fact]
public void ColumnStore_Sum_Int32_CorrectResult()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act
var sum = columnStore.Sum<int>("Age");
// Assert
var expectedSum = employees.Sum(e => e.Age);
Assert.Equal(expectedSum, sum);
Console.WriteLine($"? SUM(Age) = {sum:N0} (expected: {expectedSum:N0})");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_Sum_Decimal_CorrectResult()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act
var sum = columnStore.Sum<decimal>("Salary");
// Assert
var expectedSum = employees.Sum(e => e.Salary);
Assert.Equal(expectedSum, sum);
Console.WriteLine($"? SUM(Salary) = ${sum:N0} (expected: ${expectedSum:N0})");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_Sum_10kRecords_Under2ms()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Warm up
_ = columnStore.Sum<int>("Age");
// Act: Benchmark SUM on 10k records
var sw = Stopwatch.StartNew();
var sum = columnStore.Sum<int>("Age");
sw.Stop();
// Assert
Assert.True(sw.Elapsed.TotalMilliseconds < 2.0,
$"Expected < 2ms, got {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($"? SUM on 10k records: {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Result: {sum:N0}");
Console.WriteLine($" Throughput: {10000.0 / sw.Elapsed.TotalMilliseconds:F0}k rows/ms");
columnStore.Dispose();
}
#endregion
#region AVERAGE Aggregate Tests
[Fact]
public void ColumnStore_Average_CorrectResult()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act
var avg = columnStore.Average("Age");
// Assert
var expectedAvg = employees.Average(e => e.Age);
Assert.Equal(expectedAvg, avg, precision: 2);
Console.WriteLine($"? AVG(Age) = {avg:F2} (expected: {expectedAvg:F2})");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_Average_Salary_CorrectResult()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act
var avg = columnStore.Average("Salary");
// Assert
var expectedAvg = employees.Average(e => (double)e.Salary);
Assert.Equal(expectedAvg, avg, precision: 2);
Console.WriteLine($"? AVG(Salary) = ${avg:N2} (expected: ${expectedAvg:N2})");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_Average_10kRecords_Under2ms()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Warm up
_ = columnStore.Average("Age");
// Act
var sw = Stopwatch.StartNew();
var avg = columnStore.Average("Age");
sw.Stop();
// Assert
Assert.True(sw.Elapsed.TotalMilliseconds < 2.0,
$"Expected < 2ms, got {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($"? AVG on 10k records: {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Result: {avg:F2}");
columnStore.Dispose();
}
#endregion
#region MIN/MAX Aggregate Tests
[Fact]
public void ColumnStore_Min_CorrectResult()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act
var min = columnStore.Min<int>("Age");
// Assert
var expectedMin = employees.Min(e => e.Age);
Assert.Equal(expectedMin, min);
Console.WriteLine($"? MIN(Age) = {min} (expected: {expectedMin})");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_Max_CorrectResult()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act
var max = columnStore.Max<int>("Age");
// Assert
var expectedMax = employees.Max(e => e.Age);
Assert.Equal(expectedMax, max);
Console.WriteLine($"? MAX(Age) = {max} (expected: {expectedMax})");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_MinMax_10kRecords_Under2ms()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Warm up
_ = columnStore.Min<int>("Age");
_ = columnStore.Max<int>("Age");
// Act: Benchmark both MIN and MAX
var sw = Stopwatch.StartNew();
var min = columnStore.Min<int>("Age");
var max = columnStore.Max<int>("Age");
sw.Stop();
// Assert
Assert.True(sw.Elapsed.TotalMilliseconds < 2.0,
$"Expected < 2ms, got {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($"? MIN+MAX on 10k records: {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" MIN = {min}, MAX = {max}");
columnStore.Dispose();
}
#endregion
#region Multi-Aggregate Tests
[Fact]
public void ColumnStore_MultipleAggregates_10kRecords_Under2ms()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Warm up
_ = columnStore.Sum<int>("Age");
// Act: Run ALL aggregates on same column
var sw = Stopwatch.StartNew();
var sum = columnStore.Sum<int>("Age");
var avg = columnStore.Average("Age");
var min = columnStore.Min<int>("Age");
var max = columnStore.Max<int>("Age");
var count = columnStore.Count("Age");
sw.Stop();
// Assert: All 5 aggregates should complete in < 2ms total
Assert.True(sw.Elapsed.TotalMilliseconds < 2.0,
$"Expected < 2ms for all aggregates, got {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($"? ALL AGGREGATES on 10k records: {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" SUM = {sum:N0}");
Console.WriteLine($" AVG = {avg:F2}");
Console.WriteLine($" MIN = {min}");
Console.WriteLine($" MAX = {max}");
Console.WriteLine($" COUNT = {count:N0}");
columnStore.Dispose();
}
[Fact]
public void ColumnStore_AggregatesOnMultipleColumns_Under2ms()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
// Act: Aggregates on different columns
var sw = Stopwatch.StartNew();
var avgAge = columnStore.Average("Age");
var avgSalary = columnStore.Average("Salary");
var minAge = columnStore.Min<int>("Age");
var maxSalary = columnStore.Max<decimal>("Salary");
sw.Stop();
// Assert (relaxed threshold for CI/different hardware)
Assert.True(sw.Elapsed.TotalMilliseconds < 10.0,
$"Expected < 10ms, got {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($"? Multi-column aggregates: {sw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" AVG(Age) = {avgAge:F2}");
Console.WriteLine($" AVG(Salary) = ${avgSalary:N2}");
Console.WriteLine($" MIN(Age) = {minAge}");
Console.WriteLine($" MAX(Salary) = ${maxSalary:N0}");
columnStore.Dispose();
}
#endregion
#region Performance Comparison Tests
[Fact]
public void ColumnStore_VsLinq_PerformanceComparison()
{
// Arrange
var employees = Generate10kEmployees();
var columnStore = new ColumnStore<EmployeeRecord>();
columnStore.Transpose(employees);
Console.WriteLine("??????????????????????????????????????????????????????????????");
Console.WriteLine("? COLUMNAR vs LINQ - PERFORMANCE COMPARISON ?");
Console.WriteLine("??????????????????????????????????????????????????????????????");
// Warm up both
_ = employees.Sum(e => e.Age);
_ = columnStore.Sum<int>("Age");
// Test 1: SUM
var linqSw = Stopwatch.StartNew();
var linqSum = employees.Sum(e => e.Age);
linqSw.Stop();
var columnSw = Stopwatch.StartNew();
var columnSum = columnStore.Sum<int>("Age");
columnSw.Stop();
var sumSpeedup = linqSw.Elapsed.TotalMilliseconds / columnSw.Elapsed.TotalMilliseconds;
Console.WriteLine($"\n?? SUM(Age) on 10k records:");
Console.WriteLine($" LINQ: {linqSw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Columnar: {columnSw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Speedup: {sumSpeedup:F2}x faster! ?");
// Test 2: AVERAGE
linqSw.Restart();
var linqAvg = employees.Average(e => e.Age);
linqSw.Stop();
columnSw.Restart();
var columnAvg = columnStore.Average("Age");
columnSw.Stop();
var avgSpeedup = linqSw.Elapsed.TotalMilliseconds / columnSw.Elapsed.TotalMilliseconds;
Console.WriteLine($"\n?? AVG(Age) on 10k records:");
Console.WriteLine($" LINQ: {linqSw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Columnar: {columnSw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Speedup: {avgSpeedup:F2}x faster! ?");
// Test 3: MIN + MAX
linqSw.Restart();
var linqMin = employees.Min(e => e.Age);
var linqMax = employees.Max(e => e.Age);
linqSw.Stop();
columnSw.Restart();
var columnMin = columnStore.Min<int>("Age");
var columnMax = columnStore.Max<int>("Age");
columnSw.Stop();
var minMaxSpeedup = linqSw.Elapsed.TotalMilliseconds / columnSw.Elapsed.TotalMilliseconds;
Console.WriteLine($"\n?? MIN+MAX(Age) on 10k records:");
Console.WriteLine($" LINQ: {linqSw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Columnar: {columnSw.Elapsed.TotalMilliseconds:F3}ms");
Console.WriteLine($" Speedup: {minMaxSpeedup:F2}x faster! ?");
Console.WriteLine($"\n??????????????????????????????????????????????????????????????");
Console.WriteLine($"? SUMMARY ?");
Console.WriteLine($"??????????????????????????????????????????????????????????????");
Console.WriteLine($" Average Speedup: {(sumSpeedup + avgSpeedup + minMaxSpeedup) / 3:F2}x");
Console.WriteLine($" Columnar storage is SIGNIFICANTLY faster! ??");
// Assert: Columnar should be at least 2x faster
Assert.True(sumSpeedup > 2.0, "Columnar SUM should be at least 2x faster");
Assert.True(avgSpeedup > 2.0, "Columnar AVG should be at least 2x faster");
columnStore.Dispose();
}
#endregion
#region Helper Methods
private static List<EmployeeRecord> Generate10kEmployees()
{
var random = new Random(42); // Seed for reproducibility
var employees = new List<EmployeeRecord>(10_000);
var departments = new[] { "Engineering", "Sales", "Marketing", "HR", "Finance" };
for (int i = 0; i < 10_000; i++)
{
employees.Add(new EmployeeRecord(
Id: i + 1,
Name: $"Employee{i + 1}",
Age: random.Next(22, 65),
Salary: random.Next(50_000, 200_000),
Department: departments[random.Next(departments.Length)],
HireDate: DateTime.Now.AddDays(-random.Next(1, 3650))
));
}
return employees;
}
private static List<SalesRecord> Generate10kSales()
{
var random = new Random(42);
var sales = new List<SalesRecord>(10_000);
var products = new[] { "ProductA", "ProductB", "ProductC", "ProductD", "ProductE" };
var regions = new[] { "North", "South", "East", "West" };
for (int i = 0; i < 10_000; i++)
{
sales.Add(new SalesRecord(
Id: i + 1,
Product: products[random.Next(products.Length)],
Price: random.Next(10, 1000),
Quantity: random.Next(1, 100),
OrderDate: DateTime.Now.AddDays(-random.Next(1, 365)),
Region: regions[random.Next(regions.Length)]
));
}
return sales;
}
#endregion
}