Skip to content

Commit 7a7ab34

Browse files
author
MPCoreDeveloper
committed
feat(phase7): Add Advanced Query Optimization - SimdFilter, ColumnarStorage, CostBasedOptimizer, ParallelQueryExecutor, MaterializedView - all 34 tests passing
1 parent 6092c0f commit 7a7ab34

File tree

6 files changed

+2072
-0
lines changed

6 files changed

+2072
-0
lines changed
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
// <copyright file="CostBasedOptimizer.cs" company="MPCoreDeveloper">
2+
// Copyright (c) 2025-2026 MPCoreDeveloper and GitHub Copilot. All rights reserved.
3+
// Licensed under the MIT License. See LICENSE file in the project root for full license information.
4+
// </copyright>
5+
6+
namespace SharpCoreDB.Query;
7+
8+
using System;
9+
using System.Collections.Generic;
10+
using System.Linq;
11+
12+
/// <summary>
13+
/// Cost-based query optimizer.
14+
/// C# 14: Modern patterns, cost estimation, join ordering.
15+
///
16+
/// ✅ SCDB Phase 7.3: Advanced Query Optimization - Cost-Based Optimizer
17+
///
18+
/// Purpose:
19+
/// - Estimate query execution cost
20+
/// - Optimize join ordering
21+
/// - Push down predicates
22+
/// - Choose optimal execution plan
23+
/// </summary>
24+
public sealed class CostBasedOptimizer
25+
{
26+
private readonly Dictionary<string, TableStatistics> _tableStats = [];
27+
private readonly Lock _lock = new();
28+
29+
/// <summary>
30+
/// Registers table statistics.
31+
/// </summary>
32+
public void RegisterTable(string tableName, TableStatistics stats)
33+
{
34+
ArgumentException.ThrowIfNullOrWhiteSpace(tableName);
35+
ArgumentNullException.ThrowIfNull(stats);
36+
37+
lock (_lock)
38+
{
39+
_tableStats[tableName] = stats;
40+
}
41+
}
42+
43+
/// <summary>
44+
/// Estimates cardinality (result size) for a query.
45+
/// </summary>
46+
public long EstimateCardinality(QueryPlan query)
47+
{
48+
ArgumentNullException.ThrowIfNull(query);
49+
50+
lock (_lock)
51+
{
52+
if (!_tableStats.TryGetValue(query.TableName, out var stats))
53+
{
54+
return 1000; // Default estimate
55+
}
56+
57+
long cardinality = stats.RowCount;
58+
59+
// Apply selectivity for each filter
60+
foreach (var filter in query.Filters)
61+
{
62+
cardinality = (long)(cardinality * EstimateSelectivity(filter, stats));
63+
}
64+
65+
return Math.Max(1, cardinality);
66+
}
67+
}
68+
69+
/// <summary>
70+
/// Estimates execution cost for a query.
71+
/// </summary>
72+
public double EstimateCost(QueryPlan query)
73+
{
74+
ArgumentNullException.ThrowIfNull(query);
75+
76+
double cost = 0;
77+
78+
// Scan cost
79+
var cardinality = EstimateCardinality(query);
80+
cost += cardinality * CostConstants.ScanCostPerRow;
81+
82+
// Filter cost
83+
cost += query.Filters.Count * cardinality * CostConstants.FilterCostPerRow;
84+
85+
// Sort cost (if needed)
86+
if (query.RequiresSort)
87+
{
88+
cost += cardinality * Math.Log(cardinality) * CostConstants.SortCostFactor;
89+
}
90+
91+
// Aggregation cost
92+
if (query.HasAggregation)
93+
{
94+
cost += cardinality * CostConstants.AggregationCostPerRow;
95+
}
96+
97+
return cost;
98+
}
99+
100+
/// <summary>
101+
/// Optimizes join order for multiple tables.
102+
/// </summary>
103+
public List<string> OptimizeJoinOrder(List<string> tables, List<JoinCondition> joins)
104+
{
105+
ArgumentNullException.ThrowIfNull(tables);
106+
107+
if (tables.Count <= 2)
108+
{
109+
return tables; // No optimization needed
110+
}
111+
112+
// Greedy join ordering: start with smallest table
113+
var remaining = new HashSet<string>(tables);
114+
var ordered = new List<string>();
115+
116+
// Pick smallest table first
117+
var smallest = tables.OrderBy(t => GetTableSize(t)).First();
118+
ordered.Add(smallest);
119+
remaining.Remove(smallest);
120+
121+
// Iteratively add table with lowest join cost
122+
while (remaining.Count > 0)
123+
{
124+
string? best = null;
125+
double bestCost = double.MaxValue;
126+
127+
foreach (var table in remaining)
128+
{
129+
var cost = EstimateJoinCost(ordered[^1], table, joins);
130+
if (cost < bestCost)
131+
{
132+
bestCost = cost;
133+
best = table;
134+
}
135+
}
136+
137+
if (best != null)
138+
{
139+
ordered.Add(best);
140+
remaining.Remove(best);
141+
}
142+
}
143+
144+
return ordered;
145+
}
146+
147+
/// <summary>
148+
/// Pushes predicates down to table scans.
149+
/// </summary>
150+
public QueryPlan PushDownPredicates(QueryPlan query)
151+
{
152+
ArgumentNullException.ThrowIfNull(query);
153+
154+
// Separate predicates by table
155+
var tablePredicates = query.Filters
156+
.Where(f => f.TableName == query.TableName)
157+
.ToList();
158+
159+
return query with
160+
{
161+
Filters = tablePredicates,
162+
PushedDown = true
163+
};
164+
}
165+
166+
/// <summary>
167+
/// Chooses the best index for a query.
168+
/// </summary>
169+
public string? ChooseBestIndex(QueryPlan query, List<string> availableIndexes)
170+
{
171+
if (availableIndexes.Count == 0)
172+
return null;
173+
174+
// Simple heuristic: choose index on first filter column
175+
if (query.Filters.Count > 0)
176+
{
177+
var firstColumn = query.Filters[0].ColumnName;
178+
return availableIndexes.FirstOrDefault(idx => idx.Contains(firstColumn));
179+
}
180+
181+
return availableIndexes[0];
182+
}
183+
184+
// Private helpers
185+
186+
private double EstimateSelectivity(FilterExpression filter, TableStatistics stats)
187+
{
188+
return filter.Operator switch
189+
{
190+
FilterOperator.Equals => 1.0 / Math.Max(1, stats.DistinctValues.GetValueOrDefault(filter.ColumnName, 10)),
191+
FilterOperator.Range => 0.1, // Assume 10% selectivity for ranges
192+
FilterOperator.In => Math.Min(1.0, filter.InValues?.Count ?? 1 / 10.0),
193+
FilterOperator.Like => 0.5, // 50% for LIKE patterns
194+
_ => 0.1
195+
};
196+
}
197+
198+
private long GetTableSize(string tableName)
199+
{
200+
lock (_lock)
201+
{
202+
return _tableStats.TryGetValue(tableName, out var stats) ? stats.RowCount : 1000;
203+
}
204+
}
205+
206+
private double EstimateJoinCost(string leftTable, string rightTable, List<JoinCondition> joins)
207+
{
208+
var leftSize = GetTableSize(leftTable);
209+
var rightSize = GetTableSize(rightTable);
210+
211+
// Nested loop join cost: O(n * m)
212+
return leftSize * rightSize * CostConstants.JoinCostPerRow;
213+
}
214+
}
215+
216+
/// <summary>
217+
/// Query execution plan.
218+
/// </summary>
219+
public sealed record QueryPlan
220+
{
221+
/// <summary>Primary table name.</summary>
222+
public required string TableName { get; init; }
223+
224+
/// <summary>Filter expressions.</summary>
225+
public required List<FilterExpression> Filters { get; init; }
226+
227+
/// <summary>Whether sorting is required.</summary>
228+
public bool RequiresSort { get; init; }
229+
230+
/// <summary>Whether aggregation is needed.</summary>
231+
public bool HasAggregation { get; init; }
232+
233+
/// <summary>Whether predicates have been pushed down.</summary>
234+
public bool PushedDown { get; init; }
235+
}
236+
237+
/// <summary>
238+
/// Filter expression.
239+
/// </summary>
240+
public sealed record FilterExpression
241+
{
242+
/// <summary>Table name (for joins).</summary>
243+
public string? TableName { get; init; }
244+
245+
/// <summary>Column name.</summary>
246+
public required string ColumnName { get; init; }
247+
248+
/// <summary>Filter operator.</summary>
249+
public required FilterOperator Operator { get; init; }
250+
251+
/// <summary>Comparison value.</summary>
252+
public object? Value { get; init; }
253+
254+
/// <summary>Values for IN operator.</summary>
255+
public List<object>? InValues { get; init; }
256+
}
257+
258+
/// <summary>
259+
/// Filter operators.
260+
/// </summary>
261+
public enum FilterOperator
262+
{
263+
/// <summary>Equality.</summary>
264+
Equals,
265+
266+
/// <summary>Range.</summary>
267+
Range,
268+
269+
/// <summary>IN clause.</summary>
270+
In,
271+
272+
/// <summary>LIKE pattern.</summary>
273+
Like,
274+
275+
/// <summary>Greater than.</summary>
276+
GreaterThan,
277+
278+
/// <summary>Less than.</summary>
279+
LessThan
280+
}
281+
282+
/// <summary>
283+
/// Join condition.
284+
/// </summary>
285+
public sealed record JoinCondition
286+
{
287+
/// <summary>Left table.</summary>
288+
public required string LeftTable { get; init; }
289+
290+
/// <summary>Left column.</summary>
291+
public required string LeftColumn { get; init; }
292+
293+
/// <summary>Right table.</summary>
294+
public required string RightTable { get; init; }
295+
296+
/// <summary>Right column.</summary>
297+
public required string RightColumn { get; init; }
298+
}
299+
300+
/// <summary>
301+
/// Table statistics for cost estimation.
302+
/// </summary>
303+
public sealed record TableStatistics
304+
{
305+
/// <summary>Table name.</summary>
306+
public required string TableName { get; init; }
307+
308+
/// <summary>Total row count.</summary>
309+
public required long RowCount { get; init; }
310+
311+
/// <summary>Average row size in bytes.</summary>
312+
public int AverageRowSize { get; init; } = 100;
313+
314+
/// <summary>Distinct values per column.</summary>
315+
public Dictionary<string, long> DistinctValues { get; init; } = [];
316+
}
317+
318+
/// <summary>
319+
/// Cost constants for estimation.
320+
/// </summary>
321+
internal static class CostConstants
322+
{
323+
public const double ScanCostPerRow = 1.0;
324+
public const double FilterCostPerRow = 0.1;
325+
public const double SortCostFactor = 0.5;
326+
public const double AggregationCostPerRow = 0.2;
327+
public const double JoinCostPerRow = 0.05;
328+
}

0 commit comments

Comments
 (0)