@@ -111,11 +111,8 @@ public static ColumnStats BuildStats(string columnName, int[] values)
111111 var distinctValues = new HashSet < int > ( nonNullValues ) ;
112112 var nullCount = values . Length - nonNullValues . Count ;
113113
114- var minValue = nonNullValues . Count > 0 ? nonNullValues . Min ( ) : ( int ? ) null ;
115- var maxValue = nonNullValues . Count > 0 ? nonNullValues . Max ( ) : ( int ? ) null ;
116-
117- // Build histogram with 10 buckets
118- var histogram = BuildHistogram ( nonNullValues , minValue , maxValue , bucketCount : 10 ) ;
114+ var minValue = nonNullValues . Count > 0 ? ( IComparable ? ) nonNullValues . Min ( ) : null ;
115+ var maxValue = nonNullValues . Count > 0 ? ( IComparable ? ) nonNullValues . Max ( ) : null ;
119116
120117 return new ColumnStats
121118 {
@@ -125,7 +122,7 @@ public static ColumnStats BuildStats(string columnName, int[] values)
125122 DistinctCount = distinctValues . Count ,
126123 MinValue = minValue ,
127124 MaxValue = maxValue ,
128- Histogram = histogram . ToArray ( ) ,
125+ Histogram = null , // Histogram support can be added in Phase 7.2
129126 } ;
130127 }
131128
@@ -150,10 +147,8 @@ public static ColumnStats BuildStats(string columnName, long[] values)
150147 var distinctValues = new HashSet < long > ( nonNullValues ) ;
151148 var nullCount = values . Length - nonNullValues . Count ;
152149
153- var minValue = nonNullValues . Count > 0 ? nonNullValues . Min ( ) : ( long ? ) null ;
154- var maxValue = nonNullValues . Count > 0 ? nonNullValues . Max ( ) : ( long ? ) null ;
155-
156- var histogram = BuildHistogram ( nonNullValues , minValue , maxValue , bucketCount : 10 ) ;
150+ var minValue = nonNullValues . Count > 0 ? ( IComparable ? ) nonNullValues . Min ( ) : null ;
151+ var maxValue = nonNullValues . Count > 0 ? ( IComparable ? ) nonNullValues . Max ( ) : null ;
157152
158153 return new ColumnStats
159154 {
@@ -163,7 +158,7 @@ public static ColumnStats BuildStats(string columnName, long[] values)
163158 DistinctCount = distinctValues . Count ,
164159 MinValue = minValue ,
165160 MaxValue = maxValue ,
166- Histogram = histogram . ToArray ( ) ,
161+ Histogram = null ,
167162 } ;
168163 }
169164
@@ -188,10 +183,8 @@ public static ColumnStats BuildStats(string columnName, double[] values)
188183 var distinctValues = new HashSet < double > ( nonNullValues ) ;
189184 var nullCount = values . Length - nonNullValues . Count ;
190185
191- var minValue = nonNullValues . Count > 0 ? nonNullValues . Min ( ) : ( double ? ) null ;
192- var maxValue = nonNullValues . Count > 0 ? nonNullValues . Max ( ) : ( double ? ) null ;
193-
194- var histogram = BuildHistogram ( nonNullValues , minValue , maxValue , bucketCount : 10 ) ;
186+ var minValue = nonNullValues . Count > 0 ? ( IComparable ? ) nonNullValues . Min ( ) : null ;
187+ var maxValue = nonNullValues . Count > 0 ? ( IComparable ? ) nonNullValues . Max ( ) : null ;
195188
196189 return new ColumnStats
197190 {
@@ -201,7 +194,7 @@ public static ColumnStats BuildStats(string columnName, double[] values)
201194 DistinctCount = distinctValues . Count ,
202195 MinValue = minValue ,
203196 MaxValue = maxValue ,
204- Histogram = histogram . ToArray ( ) ,
197+ Histogram = null ,
205198 } ;
206199 }
207200
@@ -267,78 +260,7 @@ public static double EstimateSelectivity(ColumnStats stats,
267260 if ( encoding == ColumnFormat . ColumnEncoding . Dictionary )
268261 return stats . DistinctSelectivity ;
269262
270- // Range predicates - estimate using histogram if available
271- if ( predicateOperator == ">" || predicateOperator == ">=" ||
272- predicateOperator == "<" || predicateOperator == "<=" )
273- {
274- if ( stats . Histogram != null && predicateValue is IComparable comparable )
275- {
276- return EstimateRangeSelectivity ( stats . Histogram , predicateOperator , comparable ) ;
277- }
278- }
279-
280263 // Default estimate: 10% selectivity (conservative)
281264 return 0.1 ;
282265 }
283-
284- /// <summary>Helper: Builds histogram from values.</summary>
285- private static List < HistogramBucket > BuildHistogram < T > (
286- List < T > values ,
287- T ? minValue ,
288- T ? maxValue ,
289- int bucketCount ) where T : IComparable
290- {
291- var result = new List < HistogramBucket > ( ) ;
292-
293- if ( values . Count == 0 || minValue == null || maxValue == null )
294- return result ;
295-
296- var sorted = values . OrderBy ( v => v ) . ToList ( ) ;
297- var bucketSize = ( values . Count + bucketCount - 1 ) / bucketCount ;
298-
299- for ( int i = 0 ; i < bucketCount && i * bucketSize < values . Count ; i ++ )
300- {
301- var lower = sorted [ i * bucketSize ] ;
302- var upper = i < bucketCount - 1
303- ? sorted [ Math . Min ( ( i + 1 ) * bucketSize , values . Count - 1 ) ]
304- : maxValue ;
305-
306- var count = Math . Min ( bucketSize , values . Count - ( i * bucketSize ) ) ;
307-
308- result . Add ( new HistogramBucket
309- {
310- BoundLower = lower ,
311- BoundUpper = upper ,
312- Count = count ,
313- } ) ;
314- }
315-
316- return result ;
317- }
318-
319- /// <summary>Helper: Estimates selectivity for range predicates.</summary>
320- private static double EstimateRangeSelectivity ( HistogramBucket [ ] histogram ,
321- string predicateOperator ,
322- IComparable value )
323- {
324- double selectivity = 0.0 ;
325-
326- foreach ( var bucket in histogram )
327- {
328- bool bucketMatches = predicateOperator switch
329- {
330- ">" => bucket . BoundUpper . CompareTo ( value ) > 0 ,
331- ">=" => bucket . BoundUpper . CompareTo ( value ) >= 0 ,
332- "<" => bucket . BoundLower . CompareTo ( value ) < 0 ,
333- "<=" => bucket . BoundLower . CompareTo ( value ) <= 0 ,
334- "=" => bucket . BoundLower . CompareTo ( value ) <= 0 && bucket . BoundUpper . CompareTo ( value ) > 0 ,
335- _ => false
336- } ;
337-
338- if ( bucketMatches )
339- selectivity += bucket . Fraction ( histogram . Sum ( b => b . Count ) ) ;
340- }
341-
342- return selectivity ;
343- }
344266}
0 commit comments