|
| 1 | +// <copyright file="CostBasedOptimizer.cs" company="MPCoreDeveloper"> |
| 2 | +// Copyright (c) 2025-2026 MPCoreDeveloper and GitHub Copilot. All rights reserved. |
| 3 | +// Licensed under the MIT License. See LICENSE file in the project root for full license information. |
| 4 | +// </copyright> |
| 5 | + |
| 6 | +namespace SharpCoreDB.Query; |
| 7 | + |
| 8 | +using System; |
| 9 | +using System.Collections.Generic; |
| 10 | +using System.Linq; |
| 11 | + |
| 12 | +/// <summary> |
| 13 | +/// Cost-based query optimizer. |
| 14 | +/// C# 14: Modern patterns, cost estimation, join ordering. |
| 15 | +/// |
| 16 | +/// ✅ SCDB Phase 7.3: Advanced Query Optimization - Cost-Based Optimizer |
| 17 | +/// |
| 18 | +/// Purpose: |
| 19 | +/// - Estimate query execution cost |
| 20 | +/// - Optimize join ordering |
| 21 | +/// - Push down predicates |
| 22 | +/// - Choose optimal execution plan |
| 23 | +/// </summary> |
| 24 | +public sealed class CostBasedOptimizer |
| 25 | +{ |
| 26 | + private readonly Dictionary<string, TableStatistics> _tableStats = []; |
| 27 | + private readonly Lock _lock = new(); |
| 28 | + |
| 29 | + /// <summary> |
| 30 | + /// Registers table statistics. |
| 31 | + /// </summary> |
| 32 | + public void RegisterTable(string tableName, TableStatistics stats) |
| 33 | + { |
| 34 | + ArgumentException.ThrowIfNullOrWhiteSpace(tableName); |
| 35 | + ArgumentNullException.ThrowIfNull(stats); |
| 36 | + |
| 37 | + lock (_lock) |
| 38 | + { |
| 39 | + _tableStats[tableName] = stats; |
| 40 | + } |
| 41 | + } |
| 42 | + |
| 43 | + /// <summary> |
| 44 | + /// Estimates cardinality (result size) for a query. |
| 45 | + /// </summary> |
| 46 | + public long EstimateCardinality(QueryPlan query) |
| 47 | + { |
| 48 | + ArgumentNullException.ThrowIfNull(query); |
| 49 | + |
| 50 | + lock (_lock) |
| 51 | + { |
| 52 | + if (!_tableStats.TryGetValue(query.TableName, out var stats)) |
| 53 | + { |
| 54 | + return 1000; // Default estimate |
| 55 | + } |
| 56 | + |
| 57 | + long cardinality = stats.RowCount; |
| 58 | + |
| 59 | + // Apply selectivity for each filter |
| 60 | + foreach (var filter in query.Filters) |
| 61 | + { |
| 62 | + cardinality = (long)(cardinality * EstimateSelectivity(filter, stats)); |
| 63 | + } |
| 64 | + |
| 65 | + return Math.Max(1, cardinality); |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + /// <summary> |
| 70 | + /// Estimates execution cost for a query. |
| 71 | + /// </summary> |
| 72 | + public double EstimateCost(QueryPlan query) |
| 73 | + { |
| 74 | + ArgumentNullException.ThrowIfNull(query); |
| 75 | + |
| 76 | + double cost = 0; |
| 77 | + |
| 78 | + // Scan cost |
| 79 | + var cardinality = EstimateCardinality(query); |
| 80 | + cost += cardinality * CostConstants.ScanCostPerRow; |
| 81 | + |
| 82 | + // Filter cost |
| 83 | + cost += query.Filters.Count * cardinality * CostConstants.FilterCostPerRow; |
| 84 | + |
| 85 | + // Sort cost (if needed) |
| 86 | + if (query.RequiresSort) |
| 87 | + { |
| 88 | + cost += cardinality * Math.Log(cardinality) * CostConstants.SortCostFactor; |
| 89 | + } |
| 90 | + |
| 91 | + // Aggregation cost |
| 92 | + if (query.HasAggregation) |
| 93 | + { |
| 94 | + cost += cardinality * CostConstants.AggregationCostPerRow; |
| 95 | + } |
| 96 | + |
| 97 | + return cost; |
| 98 | + } |
| 99 | + |
| 100 | + /// <summary> |
| 101 | + /// Optimizes join order for multiple tables. |
| 102 | + /// </summary> |
| 103 | + public List<string> OptimizeJoinOrder(List<string> tables, List<JoinCondition> joins) |
| 104 | + { |
| 105 | + ArgumentNullException.ThrowIfNull(tables); |
| 106 | + |
| 107 | + if (tables.Count <= 2) |
| 108 | + { |
| 109 | + return tables; // No optimization needed |
| 110 | + } |
| 111 | + |
| 112 | + // Greedy join ordering: start with smallest table |
| 113 | + var remaining = new HashSet<string>(tables); |
| 114 | + var ordered = new List<string>(); |
| 115 | + |
| 116 | + // Pick smallest table first |
| 117 | + var smallest = tables.OrderBy(t => GetTableSize(t)).First(); |
| 118 | + ordered.Add(smallest); |
| 119 | + remaining.Remove(smallest); |
| 120 | + |
| 121 | + // Iteratively add table with lowest join cost |
| 122 | + while (remaining.Count > 0) |
| 123 | + { |
| 124 | + string? best = null; |
| 125 | + double bestCost = double.MaxValue; |
| 126 | + |
| 127 | + foreach (var table in remaining) |
| 128 | + { |
| 129 | + var cost = EstimateJoinCost(ordered[^1], table, joins); |
| 130 | + if (cost < bestCost) |
| 131 | + { |
| 132 | + bestCost = cost; |
| 133 | + best = table; |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + if (best != null) |
| 138 | + { |
| 139 | + ordered.Add(best); |
| 140 | + remaining.Remove(best); |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + return ordered; |
| 145 | + } |
| 146 | + |
| 147 | + /// <summary> |
| 148 | + /// Pushes predicates down to table scans. |
| 149 | + /// </summary> |
| 150 | + public QueryPlan PushDownPredicates(QueryPlan query) |
| 151 | + { |
| 152 | + ArgumentNullException.ThrowIfNull(query); |
| 153 | + |
| 154 | + // Separate predicates by table |
| 155 | + var tablePredicates = query.Filters |
| 156 | + .Where(f => f.TableName == query.TableName) |
| 157 | + .ToList(); |
| 158 | + |
| 159 | + return query with |
| 160 | + { |
| 161 | + Filters = tablePredicates, |
| 162 | + PushedDown = true |
| 163 | + }; |
| 164 | + } |
| 165 | + |
| 166 | + /// <summary> |
| 167 | + /// Chooses the best index for a query. |
| 168 | + /// </summary> |
| 169 | + public string? ChooseBestIndex(QueryPlan query, List<string> availableIndexes) |
| 170 | + { |
| 171 | + if (availableIndexes.Count == 0) |
| 172 | + return null; |
| 173 | + |
| 174 | + // Simple heuristic: choose index on first filter column |
| 175 | + if (query.Filters.Count > 0) |
| 176 | + { |
| 177 | + var firstColumn = query.Filters[0].ColumnName; |
| 178 | + return availableIndexes.FirstOrDefault(idx => idx.Contains(firstColumn)); |
| 179 | + } |
| 180 | + |
| 181 | + return availableIndexes[0]; |
| 182 | + } |
| 183 | + |
| 184 | + // Private helpers |
| 185 | + |
| 186 | + private double EstimateSelectivity(FilterExpression filter, TableStatistics stats) |
| 187 | + { |
| 188 | + return filter.Operator switch |
| 189 | + { |
| 190 | + FilterOperator.Equals => 1.0 / Math.Max(1, stats.DistinctValues.GetValueOrDefault(filter.ColumnName, 10)), |
| 191 | + FilterOperator.Range => 0.1, // Assume 10% selectivity for ranges |
| 192 | + FilterOperator.In => Math.Min(1.0, filter.InValues?.Count ?? 1 / 10.0), |
| 193 | + FilterOperator.Like => 0.5, // 50% for LIKE patterns |
| 194 | + _ => 0.1 |
| 195 | + }; |
| 196 | + } |
| 197 | + |
| 198 | + private long GetTableSize(string tableName) |
| 199 | + { |
| 200 | + lock (_lock) |
| 201 | + { |
| 202 | + return _tableStats.TryGetValue(tableName, out var stats) ? stats.RowCount : 1000; |
| 203 | + } |
| 204 | + } |
| 205 | + |
| 206 | + private double EstimateJoinCost(string leftTable, string rightTable, List<JoinCondition> joins) |
| 207 | + { |
| 208 | + var leftSize = GetTableSize(leftTable); |
| 209 | + var rightSize = GetTableSize(rightTable); |
| 210 | + |
| 211 | + // Nested loop join cost: O(n * m) |
| 212 | + return leftSize * rightSize * CostConstants.JoinCostPerRow; |
| 213 | + } |
| 214 | +} |
| 215 | + |
| 216 | +/// <summary> |
| 217 | +/// Query execution plan. |
| 218 | +/// </summary> |
| 219 | +public sealed record QueryPlan |
| 220 | +{ |
| 221 | + /// <summary>Primary table name.</summary> |
| 222 | + public required string TableName { get; init; } |
| 223 | + |
| 224 | + /// <summary>Filter expressions.</summary> |
| 225 | + public required List<FilterExpression> Filters { get; init; } |
| 226 | + |
| 227 | + /// <summary>Whether sorting is required.</summary> |
| 228 | + public bool RequiresSort { get; init; } |
| 229 | + |
| 230 | + /// <summary>Whether aggregation is needed.</summary> |
| 231 | + public bool HasAggregation { get; init; } |
| 232 | + |
| 233 | + /// <summary>Whether predicates have been pushed down.</summary> |
| 234 | + public bool PushedDown { get; init; } |
| 235 | +} |
| 236 | + |
| 237 | +/// <summary> |
| 238 | +/// Filter expression. |
| 239 | +/// </summary> |
| 240 | +public sealed record FilterExpression |
| 241 | +{ |
| 242 | + /// <summary>Table name (for joins).</summary> |
| 243 | + public string? TableName { get; init; } |
| 244 | + |
| 245 | + /// <summary>Column name.</summary> |
| 246 | + public required string ColumnName { get; init; } |
| 247 | + |
| 248 | + /// <summary>Filter operator.</summary> |
| 249 | + public required FilterOperator Operator { get; init; } |
| 250 | + |
| 251 | + /// <summary>Comparison value.</summary> |
| 252 | + public object? Value { get; init; } |
| 253 | + |
| 254 | + /// <summary>Values for IN operator.</summary> |
| 255 | + public List<object>? InValues { get; init; } |
| 256 | +} |
| 257 | + |
| 258 | +/// <summary> |
| 259 | +/// Filter operators. |
| 260 | +/// </summary> |
| 261 | +public enum FilterOperator |
| 262 | +{ |
| 263 | + /// <summary>Equality.</summary> |
| 264 | + Equals, |
| 265 | + |
| 266 | + /// <summary>Range.</summary> |
| 267 | + Range, |
| 268 | + |
| 269 | + /// <summary>IN clause.</summary> |
| 270 | + In, |
| 271 | + |
| 272 | + /// <summary>LIKE pattern.</summary> |
| 273 | + Like, |
| 274 | + |
| 275 | + /// <summary>Greater than.</summary> |
| 276 | + GreaterThan, |
| 277 | + |
| 278 | + /// <summary>Less than.</summary> |
| 279 | + LessThan |
| 280 | +} |
| 281 | + |
| 282 | +/// <summary> |
| 283 | +/// Join condition. |
| 284 | +/// </summary> |
| 285 | +public sealed record JoinCondition |
| 286 | +{ |
| 287 | + /// <summary>Left table.</summary> |
| 288 | + public required string LeftTable { get; init; } |
| 289 | + |
| 290 | + /// <summary>Left column.</summary> |
| 291 | + public required string LeftColumn { get; init; } |
| 292 | + |
| 293 | + /// <summary>Right table.</summary> |
| 294 | + public required string RightTable { get; init; } |
| 295 | + |
| 296 | + /// <summary>Right column.</summary> |
| 297 | + public required string RightColumn { get; init; } |
| 298 | +} |
| 299 | + |
| 300 | +/// <summary> |
| 301 | +/// Table statistics for cost estimation. |
| 302 | +/// </summary> |
| 303 | +public sealed record TableStatistics |
| 304 | +{ |
| 305 | + /// <summary>Table name.</summary> |
| 306 | + public required string TableName { get; init; } |
| 307 | + |
| 308 | + /// <summary>Total row count.</summary> |
| 309 | + public required long RowCount { get; init; } |
| 310 | + |
| 311 | + /// <summary>Average row size in bytes.</summary> |
| 312 | + public int AverageRowSize { get; init; } = 100; |
| 313 | + |
| 314 | + /// <summary>Distinct values per column.</summary> |
| 315 | + public Dictionary<string, long> DistinctValues { get; init; } = []; |
| 316 | +} |
| 317 | + |
| 318 | +/// <summary> |
| 319 | +/// Cost constants for estimation. |
| 320 | +/// </summary> |
| 321 | +internal static class CostConstants |
| 322 | +{ |
| 323 | + public const double ScanCostPerRow = 1.0; |
| 324 | + public const double FilterCostPerRow = 0.1; |
| 325 | + public const double SortCostFactor = 0.5; |
| 326 | + public const double AggregationCostPerRow = 0.2; |
| 327 | + public const double JoinCostPerRow = 0.05; |
| 328 | +} |
0 commit comments