Skip to content

Commit c74d6c6

Browse files
authored
Add STATISTICS support for column declarations and ALTER TABLE commands (#105)
1 parent 042e803 commit c74d6c6

File tree

5 files changed

+261
-70
lines changed

5 files changed

+261
-70
lines changed

ast/ast.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ type ColumnDeclaration struct {
291291
Default Expression `json:"default,omitempty"`
292292
DefaultKind string `json:"default_kind,omitempty"` // DEFAULT, MATERIALIZED, ALIAS, EPHEMERAL
293293
Codec *CodecExpr `json:"codec,omitempty"`
294+
Statistics []*FunctionCall `json:"statistics,omitempty"` // STATISTICS clause
294295
TTL Expression `json:"ttl,omitempty"`
295296
PrimaryKey bool `json:"primary_key,omitempty"` // PRIMARY KEY constraint
296297
Comment string `json:"comment,omitempty"`
@@ -522,6 +523,8 @@ type AlterCommand struct {
522523
Assignments []*Assignment `json:"assignments,omitempty"` // For UPDATE
523524
Projection *Projection `json:"projection,omitempty"` // For ADD PROJECTION
524525
ProjectionName string `json:"projection_name,omitempty"` // For DROP/MATERIALIZE/CLEAR PROJECTION
526+
StatisticsColumns []string `json:"statistics_columns,omitempty"` // For ADD/DROP/CLEAR/MATERIALIZE STATISTICS
527+
StatisticsTypes []*FunctionCall `json:"statistics_types,omitempty"` // For ADD/MODIFY STATISTICS TYPE
525528
}
526529

527530
// Projection represents a projection definition.
@@ -585,6 +588,11 @@ const (
585588
AlterDropProjection AlterCommandType = "DROP_PROJECTION"
586589
AlterMaterializeProjection AlterCommandType = "MATERIALIZE_PROJECTION"
587590
AlterClearProjection AlterCommandType = "CLEAR_PROJECTION"
591+
AlterAddStatistics AlterCommandType = "ADD_STATISTICS"
592+
AlterModifyStatistics AlterCommandType = "MODIFY_STATISTICS"
593+
AlterDropStatistics AlterCommandType = "DROP_STATISTICS"
594+
AlterClearStatistics AlterCommandType = "CLEAR_STATISTICS"
595+
AlterMaterializeStatistics AlterCommandType = "MATERIALIZE_STATISTICS"
588596
)
589597

590598
// TruncateQuery represents a TRUNCATE statement.

internal/explain/explain.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
234234
if col.Type != nil {
235235
children++
236236
}
237+
if len(col.Statistics) > 0 {
238+
children++
239+
}
237240
// EPHEMERAL columns without explicit default get defaultValueOfTypeName
238241
hasEphemeralDefault := col.DefaultKind == "EPHEMERAL" && col.Default == nil
239242
if col.Default != nil || hasEphemeralDefault {
@@ -246,6 +249,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
246249
if col.Type != nil {
247250
Node(sb, col.Type, depth+1)
248251
}
252+
if len(col.Statistics) > 0 {
253+
explainStatisticsExpr(sb, col.Statistics, indent+" ", depth+1)
254+
}
249255
if col.Default != nil {
250256
Node(sb, col.Default, depth+1)
251257
} else if hasEphemeralDefault {
@@ -282,6 +288,31 @@ func explainCodecFunction(sb *strings.Builder, fn *ast.FunctionCall, indent stri
282288
}
283289
}
284290

291+
// explainStatisticsExpr handles STATISTICS expressions in column declarations
292+
func explainStatisticsExpr(sb *strings.Builder, stats []*ast.FunctionCall, indent string, depth int) {
293+
// STATISTICS is rendered as a Function with one child (ExpressionList of statistics types)
294+
fmt.Fprintf(sb, "%sFunction STATISTICS (children 1)\n", indent)
295+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(stats))
296+
for _, s := range stats {
297+
explainStatisticsFunction(sb, s, indent+" ", depth+2)
298+
}
299+
}
300+
301+
// explainStatisticsFunction handles individual statistics functions (e.g., tdigest, uniq, countmin)
302+
func explainStatisticsFunction(sb *strings.Builder, fn *ast.FunctionCall, indent string, depth int) {
303+
if len(fn.Arguments) == 0 {
304+
// Statistics type without parameters: just the function name
305+
fmt.Fprintf(sb, "%sFunction %s\n", indent, fn.Name)
306+
} else {
307+
// Statistics type with parameters: function with ExpressionList of arguments
308+
fmt.Fprintf(sb, "%sFunction %s (children 1)\n", indent, fn.Name)
309+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(fn.Arguments))
310+
for _, arg := range fn.Arguments {
311+
Node(sb, arg, depth+2)
312+
}
313+
}
314+
}
315+
285316
func Index(sb *strings.Builder, idx *ast.IndexDefinition, depth int) {
286317
indent := strings.Repeat(" ", depth)
287318
children := 0

internal/explain/statements.go

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,12 @@ func explainAlterQuery(sb *strings.Builder, n *ast.AlterQuery, indent string, de
829829

830830
func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent string, depth int) {
831831
children := countAlterCommandChildren(cmd)
832-
fmt.Fprintf(sb, "%sAlterCommand %s (children %d)\n", indent, cmd.Type, children)
832+
// CLEAR_STATISTICS is normalized to DROP_STATISTICS in EXPLAIN AST output
833+
cmdType := cmd.Type
834+
if cmdType == ast.AlterClearStatistics {
835+
cmdType = ast.AlterDropStatistics
836+
}
837+
fmt.Fprintf(sb, "%sAlterCommand %s (children %d)\n", indent, cmdType, children)
833838

834839
switch cmd.Type {
835840
case ast.AlterAddColumn:
@@ -917,6 +922,10 @@ func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent stri
917922
if cmd.ProjectionName != "" {
918923
fmt.Fprintf(sb, "%s Identifier %s\n", indent, cmd.ProjectionName)
919924
}
925+
case ast.AlterAddStatistics, ast.AlterModifyStatistics:
926+
explainStatisticsCommand(sb, cmd, indent, depth)
927+
case ast.AlterDropStatistics, ast.AlterClearStatistics, ast.AlterMaterializeStatistics:
928+
explainStatisticsCommand(sb, cmd, indent, depth)
920929
default:
921930
if cmd.Partition != nil {
922931
Node(sb, cmd.Partition, depth+1)
@@ -964,6 +973,49 @@ func explainProjectionSelectQuery(sb *strings.Builder, q *ast.ProjectionSelectQu
964973
}
965974
}
966975

976+
func explainStatisticsCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent string, depth int) {
977+
// Stat node has 1 child (columns only) or 2 children (columns + types)
978+
statChildren := 0
979+
if len(cmd.StatisticsColumns) > 0 {
980+
statChildren++
981+
}
982+
if len(cmd.StatisticsTypes) > 0 {
983+
statChildren++
984+
}
985+
986+
fmt.Fprintf(sb, "%s Stat (children %d)\n", indent, statChildren)
987+
988+
// First: column names as ExpressionList of Identifiers
989+
if len(cmd.StatisticsColumns) > 0 {
990+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(cmd.StatisticsColumns))
991+
for _, col := range cmd.StatisticsColumns {
992+
fmt.Fprintf(sb, "%s Identifier %s\n", indent, col)
993+
}
994+
}
995+
996+
// Second: statistics types as ExpressionList of Functions
997+
if len(cmd.StatisticsTypes) > 0 {
998+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(cmd.StatisticsTypes))
999+
for _, t := range cmd.StatisticsTypes {
1000+
explainStatisticsTypeFunction(sb, t, indent+" ", depth+3)
1001+
}
1002+
}
1003+
}
1004+
1005+
func explainStatisticsTypeFunction(sb *strings.Builder, fn *ast.FunctionCall, indent string, depth int) {
1006+
// Statistics type functions always have (children 1) even if no actual arguments
1007+
// because ClickHouse shows them with an empty ExpressionList
1008+
fmt.Fprintf(sb, "%sFunction %s (children 1)\n", indent, fn.Name)
1009+
if len(fn.Arguments) == 0 {
1010+
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
1011+
} else {
1012+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(fn.Arguments))
1013+
for _, arg := range fn.Arguments {
1014+
Node(sb, arg, depth+1)
1015+
}
1016+
}
1017+
}
1018+
9671019
func countAlterCommandChildren(cmd *ast.AlterCommand) int {
9681020
children := 0
9691021
switch cmd.Type {
@@ -1036,6 +1088,16 @@ func countAlterCommandChildren(cmd *ast.AlterCommand) int {
10361088
if cmd.ProjectionName != "" {
10371089
children++
10381090
}
1091+
case ast.AlterAddStatistics, ast.AlterModifyStatistics:
1092+
// Statistics commands with TYPE have one child (Stat node)
1093+
if len(cmd.StatisticsColumns) > 0 || len(cmd.StatisticsTypes) > 0 {
1094+
children = 1
1095+
}
1096+
case ast.AlterDropStatistics, ast.AlterClearStatistics, ast.AlterMaterializeStatistics:
1097+
// Statistics commands without TYPE have one child (Stat node with just columns)
1098+
if len(cmd.StatisticsColumns) > 0 {
1099+
children = 1
1100+
}
10391101
default:
10401102
if cmd.Partition != nil {
10411103
children++

parser/parser.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2509,6 +2509,12 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration {
25092509
col.Type = p.parseDataType()
25102510
}
25112511

2512+
// Parse STATISTICS clause (e.g., STATISTICS(tdigest, uniq))
2513+
if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" {
2514+
p.nextToken()
2515+
col.Statistics = p.parseStatisticsExpr()
2516+
}
2517+
25122518
// Handle COLLATE clause (MySQL compatibility, e.g., varchar(255) COLLATE binary)
25132519
if p.currentIs(token.COLLATE) {
25142520
p.nextToken()
@@ -2759,6 +2765,100 @@ func (p *Parser) parseCodecExpr() *ast.CodecExpr {
27592765
return codec
27602766
}
27612767

2768+
func (p *Parser) parseStatisticsExpr() []*ast.FunctionCall {
2769+
var stats []*ast.FunctionCall
2770+
2771+
if !p.expect(token.LPAREN) {
2772+
return nil
2773+
}
2774+
2775+
for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) {
2776+
if p.currentIs(token.IDENT) {
2777+
name := p.current.Value
2778+
pos := p.current.Pos
2779+
p.nextToken()
2780+
2781+
fn := &ast.FunctionCall{
2782+
Position: pos,
2783+
Name: name,
2784+
}
2785+
2786+
// Statistics types can have optional parameters: e.g., tdigest(100)
2787+
if p.currentIs(token.LPAREN) {
2788+
p.nextToken()
2789+
if !p.currentIs(token.RPAREN) {
2790+
fn.Arguments = p.parseExpressionList()
2791+
}
2792+
p.expect(token.RPAREN)
2793+
}
2794+
2795+
stats = append(stats, fn)
2796+
}
2797+
2798+
if p.currentIs(token.COMMA) {
2799+
p.nextToken()
2800+
} else {
2801+
break
2802+
}
2803+
}
2804+
2805+
p.expect(token.RPAREN)
2806+
return stats
2807+
}
2808+
2809+
// parseStatisticsColumnList parses comma-separated column names for ALTER STATISTICS commands
2810+
func (p *Parser) parseStatisticsColumnList() []string {
2811+
var columns []string
2812+
2813+
for p.currentIs(token.IDENT) || p.current.Token.IsKeyword() {
2814+
columns = append(columns, p.current.Value)
2815+
p.nextToken()
2816+
2817+
if p.currentIs(token.COMMA) {
2818+
p.nextToken()
2819+
} else {
2820+
break
2821+
}
2822+
}
2823+
2824+
return columns
2825+
}
2826+
2827+
// parseStatisticsTypeList parses comma-separated statistics type names for ALTER STATISTICS TYPE clause
2828+
func (p *Parser) parseStatisticsTypeList() []*ast.FunctionCall {
2829+
var types []*ast.FunctionCall
2830+
2831+
for p.currentIs(token.IDENT) {
2832+
name := p.current.Value
2833+
pos := p.current.Pos
2834+
p.nextToken()
2835+
2836+
fn := &ast.FunctionCall{
2837+
Position: pos,
2838+
Name: name,
2839+
}
2840+
2841+
// Statistics types can have optional parameters
2842+
if p.currentIs(token.LPAREN) {
2843+
p.nextToken()
2844+
if !p.currentIs(token.RPAREN) {
2845+
fn.Arguments = p.parseExpressionList()
2846+
}
2847+
p.expect(token.RPAREN)
2848+
}
2849+
2850+
types = append(types, fn)
2851+
2852+
if p.currentIs(token.COMMA) {
2853+
p.nextToken()
2854+
} else {
2855+
break
2856+
}
2857+
}
2858+
2859+
return types
2860+
}
2861+
27622862
func (p *Parser) parseEngineClause() *ast.EngineClause {
27632863
engine := &ast.EngineClause{
27642864
Position: p.current.Pos,
@@ -3188,6 +3288,27 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand {
31883288
cmd.Type = ast.AlterAddProjection
31893289
p.nextToken()
31903290
cmd.Projection = p.parseProjection()
3291+
} else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" {
3292+
cmd.Type = ast.AlterAddStatistics
3293+
p.nextToken()
3294+
// Handle IF NOT EXISTS
3295+
if p.currentIs(token.IF) {
3296+
p.nextToken()
3297+
if p.currentIs(token.NOT) {
3298+
p.nextToken()
3299+
if p.currentIs(token.EXISTS) {
3300+
cmd.IfNotExists = true
3301+
p.nextToken()
3302+
}
3303+
}
3304+
}
3305+
// Parse column list (comma-separated identifiers)
3306+
cmd.StatisticsColumns = p.parseStatisticsColumnList()
3307+
// Parse TYPE clause
3308+
if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TYPE" {
3309+
p.nextToken()
3310+
cmd.StatisticsTypes = p.parseStatisticsTypeList()
3311+
}
31913312
}
31923313
case token.DROP:
31933314
p.nextToken()
@@ -3233,6 +3354,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand {
32333354
cmd.ProjectionName = p.current.Value
32343355
p.nextToken()
32353356
}
3357+
} else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" {
3358+
cmd.Type = ast.AlterDropStatistics
3359+
p.nextToken()
3360+
if p.currentIs(token.IF) {
3361+
p.nextToken()
3362+
p.expect(token.EXISTS)
3363+
cmd.IfExists = true
3364+
}
3365+
cmd.StatisticsColumns = p.parseStatisticsColumnList()
32363366
}
32373367
case token.IDENT:
32383368
// Handle CLEAR, MATERIALIZE
@@ -3260,6 +3390,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand {
32603390
cmd.ProjectionName = p.current.Value
32613391
p.nextToken()
32623392
}
3393+
} else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" {
3394+
cmd.Type = ast.AlterClearStatistics
3395+
p.nextToken()
3396+
if p.currentIs(token.IF) {
3397+
p.nextToken()
3398+
p.expect(token.EXISTS)
3399+
cmd.IfExists = true
3400+
}
3401+
cmd.StatisticsColumns = p.parseStatisticsColumnList()
32633402
}
32643403
} else if upper == "MATERIALIZE" {
32653404
p.nextToken()
@@ -3277,6 +3416,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand {
32773416
cmd.ProjectionName = p.current.Value
32783417
p.nextToken()
32793418
}
3419+
} else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" {
3420+
cmd.Type = ast.AlterMaterializeStatistics
3421+
p.nextToken()
3422+
if p.currentIs(token.IF) {
3423+
p.nextToken()
3424+
p.expect(token.EXISTS)
3425+
cmd.IfExists = true
3426+
}
3427+
cmd.StatisticsColumns = p.parseStatisticsColumnList()
32803428
}
32813429
} else {
32823430
return nil
@@ -3299,6 +3447,16 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand {
32993447
cmd.Type = ast.AlterModifySetting
33003448
p.nextToken()
33013449
cmd.Settings = p.parseSettingsList()
3450+
} else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" {
3451+
cmd.Type = ast.AlterModifyStatistics
3452+
p.nextToken()
3453+
// Parse column list (comma-separated identifiers)
3454+
cmd.StatisticsColumns = p.parseStatisticsColumnList()
3455+
// Parse TYPE clause
3456+
if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TYPE" {
3457+
p.nextToken()
3458+
cmd.StatisticsTypes = p.parseStatisticsTypeList()
3459+
}
33023460
}
33033461
case token.RENAME:
33043462
p.nextToken()

0 commit comments

Comments
 (0)