Skip to content

Commit ceb3eca

Browse files
Ajit Pratap Singhclaude
authored andcommitted
feat(parser): add ClickHouse SAMPLE clause support (#454)
Parse SAMPLE <ratio|count|N/D> [OFFSET N/D] after FROM in ClickHouse dialect. Adds SampleClause AST node and sampleSQL formatter function. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 95345f7 commit ceb3eca

4 files changed

Lines changed: 148 additions & 1 deletion

File tree

pkg/formatter/render.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,11 @@ func renderSelect(s *ast.SelectStatement, opts ast.FormatOptions) string {
238238
sb.WriteString(joinSQL(&j))
239239
}
240240

241+
if s.Sample != nil {
242+
sb.WriteString(f.clauseSep())
243+
sb.WriteString(sampleSQL(s.Sample, f))
244+
}
245+
241246
if s.Where != nil {
242247
sb.WriteString(f.clauseSep())
243248
sb.WriteString(f.kw("WHERE"))
@@ -1188,6 +1193,29 @@ func tableRefSQL(t *ast.TableReference) string {
11881193
return sb.String()
11891194
}
11901195

1196+
// sampleSQL renders a ClickHouse SAMPLE clause.
1197+
func sampleSQL(s *ast.SampleClause, f *nodeFormatter) string {
1198+
var sb strings.Builder
1199+
sb.WriteString(f.kw("SAMPLE"))
1200+
sb.WriteString(" ")
1201+
sb.WriteString(s.Value)
1202+
if s.Denominator != "" {
1203+
sb.WriteString("/")
1204+
sb.WriteString(s.Denominator)
1205+
}
1206+
if s.Offset != "" {
1207+
sb.WriteString(" ")
1208+
sb.WriteString(f.kw("OFFSET"))
1209+
sb.WriteString(" ")
1210+
sb.WriteString(s.Offset)
1211+
if s.OffsetDenominator != "" {
1212+
sb.WriteString("/")
1213+
sb.WriteString(s.OffsetDenominator)
1214+
}
1215+
}
1216+
return sb.String()
1217+
}
1218+
11911219
// joinSQL renders a JOIN clause.
11921220
func joinSQL(j *ast.JoinClause) string {
11931221
var sb strings.Builder

pkg/sql/ast/ast.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,8 @@ type SelectStatement struct {
396396
From []TableReference
397397
TableName string // Added for pool operations
398398
Joins []JoinClause
399-
PrewhereClause Expression // ClickHouse PREWHERE clause (applied before WHERE, before reading data)
399+
PrewhereClause Expression // ClickHouse PREWHERE clause (applied before WHERE, before reading data)
400+
Sample *SampleClause // ClickHouse SAMPLE clause (comes after FROM/FINAL, before PREWHERE)
400401
Where Expression
401402
GroupBy []Expression
402403
Having Expression
@@ -2022,3 +2023,32 @@ func (c ConnectByClause) Children() []Node {
20222023
}
20232024
return nil
20242025
}
2026+
2027+
// SampleClause represents a ClickHouse SAMPLE clause on a SELECT statement.
2028+
//
2029+
// ClickHouse supports three sampling forms:
2030+
//
2031+
// SAMPLE 0.1 — ratio (10% of data)
2032+
// SAMPLE 1000 — approximate row count
2033+
// SAMPLE 1/10 — fraction (1 part out of 10)
2034+
// SAMPLE 1/10 OFFSET 2/10 — fraction with offset
2035+
//
2036+
// The clause is dialect-specific to ClickHouse (and partly Snowflake/Redshift
2037+
// via TABLESAMPLE, but this implementation targets SAMPLE).
2038+
// Value is stored as a raw string to preserve the original representation
2039+
// (e.g., "0.1", "1000", "1/10").
2040+
type SampleClause struct {
2041+
// Value is the sampling size/ratio as a raw token string (e.g., "0.1", "1000", "1/10").
2042+
Value string
2043+
// Denominator is set when the fraction form "N/D" is used (denominator part).
2044+
Denominator string
2045+
// Offset is the optional OFFSET fraction (e.g., "2/10" in SAMPLE 1/10 OFFSET 2/10).
2046+
Offset string
2047+
// OffsetDenominator is set for fractional offsets.
2048+
OffsetDenominator string
2049+
Pos models.Location
2050+
}
2051+
2052+
func (s *SampleClause) expressionNode() {}
2053+
func (s SampleClause) TokenLiteral() string { return "SAMPLE" }
2054+
func (s SampleClause) Children() []Node { return nil }

pkg/sql/parser/select.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,13 @@ func (p *Parser) parseSelectStatement() (ast.Statement, error) {
8787
TableName: tableName,
8888
}
8989

90+
// SAMPLE (ClickHouse-specific, specifies sampling rate/size; comes after FROM/FINAL)
91+
if p.dialect == string(keywords.DialectClickHouse) && p.isTokenMatch("SAMPLE") {
92+
if selectStmt.Sample, err = p.parseSampleClause(); err != nil {
93+
return nil, err
94+
}
95+
}
96+
9097
// PREWHERE (ClickHouse-specific, applied before WHERE for early data filtering)
9198
if p.dialect == string(keywords.DialectClickHouse) {
9299
if selectStmt.PrewhereClause, err = p.parsePrewhereClause(); err != nil {

pkg/sql/parser/select_clauses.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,88 @@ func (p *Parser) parseJoinCondition(joinType string, isNatural, isApply bool) (a
301301
return nil, p.expectedError("ON or USING")
302302
}
303303

304+
// parseSampleClause parses the ClickHouse SAMPLE clause that specifies data sampling.
305+
// It is called when the current token is SAMPLE (already verified by caller).
306+
//
307+
// Supported forms:
308+
//
309+
// SAMPLE 0.1 — ratio (floating-point fraction, e.g., 10%)
310+
// SAMPLE 1000 — approximate row count (integer)
311+
// SAMPLE 1/10 — fractional form (numerator/denominator)
312+
// SAMPLE 1/10 OFFSET 2/10 — fractional with an offset fraction
313+
func (p *Parser) parseSampleClause() (*ast.SampleClause, error) {
314+
samplePos := p.currentLocation()
315+
p.advance() // Consume SAMPLE
316+
317+
if p.isType(models.TokenTypeEOF) || p.isType(models.TokenTypeSemicolon) {
318+
return nil, goerrors.ExpectedTokenError(
319+
"sampling size after SAMPLE",
320+
p.currentToken.Token.Type.String(),
321+
p.currentLocation(),
322+
"SAMPLE clause requires a numeric argument",
323+
)
324+
}
325+
326+
clause := &ast.SampleClause{Pos: samplePos}
327+
328+
// Read the primary sampling value (numerator / whole number / float)
329+
if !p.isNumericLiteral() {
330+
return nil, goerrors.ExpectedTokenError(
331+
"numeric literal after SAMPLE",
332+
p.currentToken.Token.Type.String(),
333+
p.currentLocation(),
334+
"SAMPLE clause requires a numeric argument (ratio, row count, or N/D fraction)",
335+
)
336+
}
337+
clause.Value = p.currentToken.Token.Value
338+
p.advance()
339+
340+
// Check for fractional form: SAMPLE N / D
341+
if p.isType(models.TokenTypeDiv) {
342+
p.advance() // consume /
343+
if !p.isNumericLiteral() {
344+
return nil, goerrors.ExpectedTokenError(
345+
"denominator after /",
346+
p.currentToken.Token.Type.String(),
347+
p.currentLocation(),
348+
"SAMPLE N/D fraction requires an integer denominator",
349+
)
350+
}
351+
clause.Denominator = p.currentToken.Token.Value
352+
p.advance()
353+
}
354+
355+
// Optional OFFSET N/D
356+
if p.isTokenMatch("OFFSET") {
357+
p.advance() // Consume OFFSET
358+
if !p.isNumericLiteral() {
359+
return nil, goerrors.ExpectedTokenError(
360+
"numeric literal after SAMPLE ... OFFSET",
361+
p.currentToken.Token.Type.String(),
362+
p.currentLocation(),
363+
"SAMPLE OFFSET requires a numeric argument",
364+
)
365+
}
366+
clause.Offset = p.currentToken.Token.Value
367+
p.advance()
368+
if p.isType(models.TokenTypeDiv) {
369+
p.advance() // consume /
370+
if !p.isNumericLiteral() {
371+
return nil, goerrors.ExpectedTokenError(
372+
"denominator after OFFSET /",
373+
p.currentToken.Token.Type.String(),
374+
p.currentLocation(),
375+
"SAMPLE OFFSET N/D fraction requires an integer denominator",
376+
)
377+
}
378+
clause.OffsetDenominator = p.currentToken.Token.Value
379+
p.advance()
380+
}
381+
}
382+
383+
return clause, nil
384+
}
385+
304386
// parsePrewhereClause parses "PREWHERE <expr>" if present (ClickHouse-specific).
305387
// PREWHERE is a ClickHouse optimisation that filters data blocks before reading
306388
// all columns. It is semantically similar to WHERE but executed earlier in the

0 commit comments

Comments
 (0)