From eb1a8df9351b50a2845dafd925d58f3a1fa8941a Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Tue, 26 Aug 2025 14:40:22 +0800
Subject: [PATCH 1/9] feat: initialize fuzz

---
 .gitignore                                    |   5 +-
 go.mod                                        |   1 +
 go.sum                                        |   2 +
 tools/fuzzing/DESIGN.md                       | 536 ++++++++++++------
 tools/fuzzing/Makefile                        |  59 ++
 tools/fuzzing/README.md                       | 124 ++++
 tools/fuzzing/cmd/fuzzer/main.go              | 108 ++++
 tools/fuzzing/internal/config/config.go       |  93 +++
 tools/fuzzing/internal/generator/generator.go | 217 +++++++
 tools/fuzzing/internal/grammar/discovery.go   | 187 ++++++
 tools/fuzzing/internal/grammar/parser.go      | 259 +++++++++
 11 files changed, 1413 insertions(+), 178 deletions(-)
 create mode 100644 tools/fuzzing/Makefile
 create mode 100644 tools/fuzzing/README.md
 create mode 100644 tools/fuzzing/cmd/fuzzer/main.go
 create mode 100644 tools/fuzzing/internal/config/config.go
 create mode 100644 tools/fuzzing/internal/generator/generator.go
 create mode 100644 tools/fuzzing/internal/grammar/discovery.go
 create mode 100644 tools/fuzzing/internal/grammar/parser.go

diff --git a/.gitignore b/.gitignore
index d262e9c..a5a239e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,4 +44,7 @@ go.work.sum
 # node_modules
 **/node_modules/
 
-**/*.class
\ No newline at end of file
+**/*.class
+
+# No binary files
+**/bin/**
\ No newline at end of file
diff --git a/go.mod b/go.mod
index ff4a681..9e2614c 100644
--- a/go.mod
+++ b/go.mod
@@ -9,6 +9,7 @@ require (
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
index c96f3a5..7367760 100644
--- a/go.sum
+++ b/go.sum
@@ -2,6 +2,8 @@ github.com/bytebase/antlr/v4 v4.0.0-20240827034948-8c385f108920 h1:IfmPt5o5R70NK
 github.com/bytebase/antlr/v4 v4.0.0-20240827034948-8c385f108920/go.mod h1:ykhjIPiv0IWpu3OGXCHdz2eUSe8UNGGD6baqjs8jSuU=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
diff --git a/tools/fuzzing/DESIGN.md b/tools/fuzzing/DESIGN.md
index d5fc9c0..e78484f 100644
--- a/tools/fuzzing/DESIGN.md
+++ b/tools/fuzzing/DESIGN.md
@@ -2,229 +2,411 @@
 
 ## Overview
 
-A fuzzing tool that generates valid SQL inputs by analyzing ANTLR v4 grammar files, ensuring comprehensive parser testing with syntactically correct queries that can stress-test parsing performance and correctness.
+A simple fuzzing tool that generates SQL inputs from ANTLR grammar rules to test parser performance on specific constructs.
 
-## Goals
+## Core Problems & Solutions
 
-- **Valid Input Generation**: Generate syntactically correct SQL queries based on grammar rules
-- **Performance Testing**: Create complex queries to test parser performance limits  
-- **Coverage Maximization**: Exercise all grammar rules and edge cases
-- **Automated Testing**: Integrate with CI for continuous parser validation
+### 1. Target Specific Rules
+**Problem**: Performance issues often occur in specific rules (e.g., `createProcedureStatement`)
+**Solution**: Allow users to specify starting rule chains
 
-## Architecture
+```bash
+./fuzzer --grammar postgresql --start-rule createProcedureStatement --count 100
+./fuzzer --grammar cql --start-rule selectStatement.whereClause --count 50
+```
+
+### 2. Recursion Control  
+**Problem**: Grammar rules can be recursive, causing infinite loops during generation
+**Solution**: Limit recursion depth per rule (proven to handle all ANTLR recursion types)
 
+#### ANTLR 4 Recursion Types
+
+**Direct Left Recursion:**
+```antlr
+expr: expr '+' expr | INT    // expr directly refers to itself on left
 ```
-tools/fuzzing/
-├── generator/           # Core generation logic
-│   ├── grammar_analyzer.go    # Parse ANTLR grammar files
-│   ├── rule_expander.go       # Expand grammar rules to concrete syntax
-│   └── query_builder.go       # Build SQL queries from rule expansions
-├── strategies/          # Different generation strategies
-│   ├── depth_first.go         # Generate deeply nested structures
-│   ├── breadth_first.go       # Generate wide, complex queries
-│   └── weighted.go            # Probability-based rule selection
-├── corpus/              # Generated test cases and seeds
-│   ├── seeds/                 # Hand-crafted seed inputs
-│   └── generated/             # Auto-generated test cases
-└── cmd/                 # CLI tools
-    └── fuzzer/               # Main fuzzer executable
-```
-
-## Core Components
-
-### 1. Grammar Analyzer
-
-Leverages the existing `tools/grammar/` ANTLR v4 parser to:
-- Parse target grammar files (e.g., `postgresql.g4`, `cql.g4`) 
-- Extract production rules and their alternatives
-- Build dependency graph between rules
-- Identify terminal vs non-terminal symbols
+
+**Direct Right Recursion:**
+```antlr
+expr: INT '+' expr | INT     // expr directly refers to itself on right  
+```
+
+**Indirect Recursion (Non-Left):**
+```antlr
+selectStmt: SELECT columns fromClause whereClause?
+whereClause: WHERE expr
+expr: '(' selectStmt ')' | INT   // Indirect: expr -> selectStmt -> whereClause -> expr
+```
+*Note: ANTLR 4 does NOT support mutually left recursive grammars. This example is valid because the recursion is not left-recursive (selectStmt doesn't start with selectStmt).*
+
+**Self-Recursion with Alternatives:**
+```antlr
+stmt: ifStmt | whileStmt | blockStmt
+blockStmt: '{' stmt* '}'         // blockStmt contains multiple stmt references
+```
+
+#### Why Depth Control Works
+
+**Theorem**: Any grammar rule expansion terminates in finite steps with depth limiting.
+
+**Proof by Contradiction:**
+1. Assume infinite expansion despite depth limit `D`
+2. Each recursive call increases depth: `depth(rule_n) = depth(rule_{n-1}) + 1`
+3. When `depth ≥ D`, generator forces terminal selection
+4. Therefore, maximum expansion depth is bounded by `D`
+5. Since each rule has finite alternatives and finite elements, total expansion is finite ∎
+
+#### Depth Control Implementation
 
 ```go
-type GrammarAnalyzer struct {
-    parser *grammar.ANTLRv4Parser
-    rules  map[string]*Rule
+func (g *Generator) GenerateFromRule(ruleName string, currentDepth int) string {
+    // Base case: exceed depth limit -> force terminal
+    if currentDepth >= g.maxDepth {
+        return g.forceTerminal(ruleName)
+    }
+    
+    rule := g.grammar.GetRule(ruleName)
+    
+    // Prefer non-recursive alternatives as depth increases
+    alternative := g.selectAlternativeWithDepthBias(rule, currentDepth)
+    
+    result := ""
+    for _, element := range alternative {
+        if element.IsRule() {
+            // Recursive call with incremented depth
+            result += g.GenerateFromRule(element.Name, currentDepth+1)
+        } else {
+            result += element.Literal
+        }
+    }
+    return result
 }
 
-type Rule struct {
-    Name         string
-    Alternatives []Alternative
-    Type         RuleType // LEXER, PARSER, FRAGMENT
+func (g *Generator) forceTerminal(ruleName string) string {
+    rule := g.grammar.GetRule(ruleName)
+    
+    // Find non-recursive alternatives (containing only terminals)
+    for _, alt := range rule.Alternatives {
+        if !alt.ContainsRecursion() {
+            return g.expandAlternative(alt, g.maxDepth)
+        }
+    }
+    
+    // Fallback: use default terminal for this rule type
+    return g.getDefaultTerminal(ruleName)
 }
 ```
 
-### 2. Rule Expander
+#### Examples with Depth Control
+
+```bash
+./fuzzer --start-rule expr --max-depth 3 --count 5
+```
+
+**Generated sequences:**
+- Depth 0: `INT` (terminal)
+- Depth 1: `INT + INT` 
+- Depth 2: `(INT + INT) + INT`
+- Depth 3: `((INT + INT) + INT) + INT` (max depth reached)
 
-Recursively expands grammar rules into concrete syntax trees:
-- Handles rule recursion with configurable depth limits
-- Supports probability-weighted alternative selection  
-- Manages lexer rules and literal generation
-- Tracks generation context for smart decisions
+**Complex mutual recursion:**
+```bash  
+./fuzzer --start-rule selectStmt --max-depth 4 --count 3
+```
 
-```go
-type RuleExpander struct {
-    grammar    *ParsedGrammar
-    maxDepth   int
-    weights    map[string]float64
-    random     *rand.Rand
-}
+**Expansion trace:**
+```
+selectStmt (depth=0)
+├── SELECT columns FROM table whereClause (depth=0)
+    └── whereClause (depth=1)  
+        └── WHERE expr (depth=1)
+            └── '(' selectStmt ')' (depth=2)
+                └── selectStmt (depth=2)
+                    └── SELECT columns FROM table (depth=2, no whereClause to avoid depth=4)
 ```
 
-### 3. Query Builder
+#### Depth Strategy Options
 
-Converts syntax trees to executable SQL strings:
-- Handles whitespace and formatting
-- Manages identifier generation (table names, columns)
-- Ensures semantic consistency where possible
-- Outputs parseable query strings
+**Conservative (Early Termination):**
+- Lower max depth (3-5)
+- Bias toward terminals as depth increases
+- Prevents deep nesting, faster generation
 
-## Generation Strategies
+**Aggressive (Deep Testing):**  
+- Higher max depth (10-15)
+- Equal probability until max depth
+- Tests parser limits, slower generation
 
-### Depth-First Strategy
-- Generates deeply nested subqueries, expressions
-- Tests parser stack limits and recursion handling
-- Focuses on structural complexity
+```bash
+# Conservative - quick, shallow testing
+./fuzzer --start-rule expr --max-depth 3 --depth-strategy conservative
 
-### Breadth-First Strategy  
-- Creates wide queries with many clauses, joins, columns
-- Tests parser memory usage and performance
-- Focuses on query size and breadth
+# Aggressive - deep parser stress testing  
+./fuzzer --start-rule createProcedureStmt --max-depth 12 --depth-strategy aggressive
+```
 
-### Weighted Strategy
-- Uses probability weights for rule selection
-- Biases toward commonly used constructs
-- Configurable via weight files per dialect
+### 3. Optional Rule Probability
+**Problem**: Optional rules (`selectStmt: SELECT columns FROM table whereClause?`) need probability control
+**Solution**: Configure probability for optional elements (standard in grammar-based fuzzing)
 
-## Integration Points
+### 4. Quantified Rule Generation
+**Problem**: Quantified rules (`stmt*`, `expr+`, `column{1,5}`) need count control
+**Solution**: Configure generation counts for quantified elements
 
-### With Existing Grammar Parser
-```go
-// Reuse tools/grammar/ for parsing target grammars
-analyzer := NewGrammarAnalyzer()
-targetGrammar, err := analyzer.ParseGrammarFile("postgresql/PostgreSQLLexer.g4")
+#### ANTLR 4 Quantifier Types
+
+**Zero or More (`rule*`):**
+```antlr
+blockStmt: '{' stmt* '}'        // Generate 0 to N statements
+selectList: column (',' column)*  // Generate 1 to N columns
+```
+
+**One or More (`rule+`):**  
+```antlr
+identifier: LETTER (LETTER | DIGIT)+  // Generate 1 to N characters
+```
+
+**Exact Count (`rule{n}`):**
+```antlr
+hexDigit: HEX_DIGIT{4}         // Generate exactly 4 hex digits
+```
+
+**Range Count (`rule{min,max}`):**
+```antlr
+varchar: CHAR{1,255}           // Generate 1 to 255 characters
+```
+
+#### Quantifier Control Strategy
+
+**Count Distribution Options:**
+- **Uniform**: Equal probability for each count in range
+- **Exponential**: Higher probability for lower counts (realistic)  
+- **Fixed**: Always generate specific count
+
+```bash
+# Basic usage - user specifies max count
+./fuzzer --start-rule blockStmt --max-quantifier 10 --count 100
+
+# User controls both min and max for quantifiers  
+./fuzzer --start-rule selectList --min-quantifier 1 --max-quantifier 5 --count 50
+
+# Fixed count for performance testing
+./fuzzer --start-rule selectStmt --quantifier-count 100 --count 10
 ```
 
-### With Parser Testing
+#### Implementation Logic
+
 ```go
-// Generate test cases for specific parser
-fuzzer := NewFuzzer(postgresqlGrammar)
-queries := fuzzer.GenerateQueries(1000)
-
-for _, query := range queries {
-    // Test against postgresql parser
-    result := postgresqlParser.Parse(query)
-    // Collect metrics, detect crashes
+type QuantifierConfig struct {
+    Strategy   string // "uniform", "exponential", "fixed"
+    MinRepeat  int    // Minimum repetitions (overrides grammar min)
+    MaxRepeat  int    // Maximum repetitions (overrides grammar max)  
+    FixedCount int    // Fixed count for "fixed" strategy
+}
+
+func (g *Generator) generateQuantified(element *GrammarElement, config QuantifierConfig) string {
+    var count int
+    
+    switch element.Quantifier {
+    case "*": // Zero or more
+        min := max(0, config.MinRepeat)
+        max := min(config.MaxRepeat, 50) // Reasonable default limit
+        count = g.selectCount(min, max, config.Strategy)
+        
+    case "+": // One or more  
+        min := max(1, config.MinRepeat)
+        max := min(config.MaxRepeat, 50)
+        count = g.selectCount(min, max, config.Strategy)
+        
+    case "{n}": // Exact count
+        if config.Strategy == "fixed" {
+            count = config.FixedCount
+        } else {
+            count = element.ExactCount
+        }
+        
+    case "{min,max}": // Range
+        min := max(element.MinCount, config.MinRepeat)
+        max := min(element.MaxCount, config.MaxRepeat)
+        count = g.selectCount(min, max, config.Strategy)
+    }
+    
+    result := ""
+    for i := 0; i < count; i++ {
+        if element.IsRule() {
+            result += g.GenerateFromRule(element.RuleName, g.currentDepth+1)
+        } else {
+            result += element.Literal
+        }
+        
+        // Add separators for lists (e.g., comma-separated)
+        if i < count-1 && element.HasSeparator() {
+            result += element.Separator
+        }
+    }
+    return result
+}
+
+func (g *Generator) selectCount(min, max int, strategy string) int {
+    if min > max {
+        return min
+    }
+    
+    switch strategy {
+    case "fixed":
+        return min // Use minimum as fixed value
+        
+    case "uniform":
+        return min + g.random.Intn(max-min+1)
+        
+    case "exponential":
+        // Exponential decay: higher probability for lower counts
+        range_size := max - min + 1
+        // Generate exponentially distributed number, then map to range
+        lambda := 2.0 / float64(range_size)
+        exp_val := g.random.ExpFloat64() / lambda
+        count := min + int(exp_val)
+        if count > max {
+            count = max
+        }
+        return count
+        
+    default:
+        return min + g.random.Intn(max-min+1)
+    }
 }
 ```
 
-## Configuration
-
-### Fuzzer Config
-```yaml
-target_grammar: "postgresql"
-strategies:
-  - name: "depth_first"
-    weight: 0.3
-    max_depth: 15
-  - name: "breadth_first" 
-    weight: 0.4
-    max_width: 50
-  - name: "weighted"
-    weight: 0.3
-    weights_file: "postgresql_weights.yaml"
-
-generation:
-  count: 10000
-  max_query_length: 100000
-  seed: 42
-
-output:
-  format: "sql"
-  directory: "corpus/generated"
-```
-
-### Grammar Weights
-```yaml
-# postgresql_weights.yaml
-rules:
-  selectStmt: 0.4
-  insertStmt: 0.2  
-  updateStmt: 0.2
-  deleteStmt: 0.1
-  createStmt: 0.1
-  
-  # Bias toward complex expressions
-  expr:
-    binaryOp: 0.4
-    functionCall: 0.3
-    subquery: 0.2
-    literal: 0.1
+#### Examples with Quantifier Control
+
+**Block statement with multiple statements:**
+```bash
+./fuzzer --start-rule blockStmt --quantifier-strategy exponential --max-repeat 8
 ```
+**Generated:**
+- 70% chance: `{ stmt; }` (1 statement)
+- 20% chance: `{ stmt; stmt; }` (2 statements)  
+- 7% chance: `{ stmt; stmt; stmt; }` (3 statements)
+- 3% chance: 4+ statements
+
+**Column list generation:**
+```bash  
+./fuzzer --start-rule selectList --quantifier-strategy uniform --min-repeat 3 --max-repeat 7
+```
+**Generated:**
+- Equal probability: `col1, col2, col3` to `col1, col2, col3, col4, col5, col6, col7`
 
-## CLI Interface
+**Performance testing with large lists:**
+```bash
+./fuzzer --start-rule selectStmt --quantifier-count 100 --count 5
+```
+**Generated:**
+- Always generates exactly 100 columns to test parser performance on large SELECT lists
 
+**Simple user control:**
 ```bash
-# Generate queries for PostgreSQL
-./fuzzer generate --grammar postgresql --count 1000 --strategy weighted
+./fuzzer --start-rule blockStmt --max-quantifier 3 --count 10
+```
+**Generated:**
+- `stmt*` generates 0-3 statements
+- `expr+` generates 1-3 expressions  
+- User controls maximum without complex strategy options
 
-# Run continuous fuzzing with performance metrics
-./fuzzer fuzz --grammar cql --duration 1h --metrics
+```bash
+./fuzzer --start-rule selectStmt --optional-prob 0.7 --count 100
+# 70% chance to include optional whereClause
+```
+
+## Simple Architecture
 
-# Validate existing corpus against parser
-./fuzzer validate --grammar postgresql --corpus corpus/postgresql/
+```
+tools/fuzzing/
+├── main.go              # CLI entry point
+├── generator.go         # Core generation logic
+└── grammar_parser.go    # Reuse tools/grammar/ 
 ```
 
-## Performance Metrics
+## Core Logic
 
-### Generation Metrics
-- Queries generated per second
-- Grammar rule coverage percentage
-- Distribution of query complexity (depth, width)
+```go
+type Generator struct {
+    grammar     *ParsedGrammar
+    maxDepth    int
+    optionalProb float64
+    random      *rand.Rand
+}
 
-### Parser Testing Metrics  
-- Parse success rate
-- Average parse time per query
-- Memory usage during parsing
-- Parser crash/error detection
+func (g *Generator) GenerateFromRule(ruleName string, currentDepth int) string {
+    if currentDepth > g.maxDepth {
+        return g.generateTerminal() // Stop recursion
+    }
+    
+    rule := g.grammar.GetRule(ruleName)
+    alternative := g.selectAlternative(rule)
+    
+    result := ""
+    for _, element := range alternative {
+        if element.IsOptional() && g.random.Float64() > g.optionalProb {
+            continue // Skip optional element
+        }
+        if element.IsRule() {
+            result += g.GenerateFromRule(element.Name, currentDepth+1)
+        } else {
+            result += element.Literal
+        }
+    }
+    return result
+}
+```
+
+## CLI Interface
+
+```bash
+# Basic usage - generate from specific rule
+./fuzzer --grammar postgresql --start-rule selectStmt --count 10
+
+# Control recursion depth  
+./fuzzer --grammar cql --start-rule expr --max-depth 3 --count 5
 
-## Implementation Phases
+# Control optional probability
+./fuzzer --grammar postgresql --start-rule createStmt --optional-prob 0.8 --count 10
 
-### Phase 1: Foundation (Week 1-2)
-- Basic grammar analyzer using existing ANTLR parser
-- Simple rule expander with depth-first strategy
-- Command-line interface for manual testing
+# Control quantifier max count (for rule*, rule+)
+./fuzzer --grammar postgresql --start-rule blockStmt --max-quantifier 8 --count 20
 
-### Phase 2: Core Features (Week 3-4)
-- Multiple generation strategies
-- Configuration system
-- Basic corpus management
-- Integration with existing parser tests
+# Control all parameters together
+./fuzzer --grammar cql --start-rule selectStmt \
+  --max-depth 5 \
+  --optional-prob 0.7 \
+  --max-quantifier 10 \
+  --count 50
+
+# Output to file
+./fuzzer --grammar postgresql --start-rule selectStmt --count 100 --output queries.sql
+```
 
-### Phase 3: Advanced Features (Week 5-6)
-- Weighted generation with probability tuning
-- Performance metrics collection
-- CI integration for continuous fuzzing
-- Corpus minimization and deduplication
+## Implementation Steps
 
-### Phase 4: Optimization (Week 7-8)
-- Generation performance optimization
-- Advanced semantic awareness
-- Custom mutation strategies
-- Comprehensive documentation
+### Step 1: Basic Generator
+- Parse grammar using existing `tools/grammar/`
+- Simple rule expansion with depth limit
+- CLI with `--start-rule`, `--max-depth`, `--count`
 
-## Future Enhancements
+### Step 2: Optional Control  
+- Add `--optional-prob` flag
+- Detect optional elements in grammar rules
+- Apply probability during generation
 
-- **Semantic Awareness**: Generate queries with valid schema references
-- **Mutation-Based Fuzzing**: Mutate existing queries to explore edge cases
-- **Differential Testing**: Compare parser outputs across database dialects
-- **Performance Regression Detection**: Track parser performance over time
-- **Grammar Evolution**: Adapt fuzzing as grammars evolve
+### Step 3: Integration
+- Test generated queries against parsers
+- Add basic performance timing
+- CI integration for regression testing
 
-## Dependencies
+## Common Fuzzing Techniques Used
 
-- Existing `tools/grammar/` ANTLR v4 parser
-- Go standard library (`rand`, `fmt`, `strings`)
-- YAML configuration parsing
-- CLI framework (e.g., `cobra`)
+1. **Grammar-based generation** - Generate from formal grammar rules
+2. **Depth limiting** - Prevent infinite recursion in recursive grammars  
+3. **Probability-based selection** - Control optional rule inclusion
+4. **Targeted fuzzing** - Focus on specific rule paths instead of full grammar
 
-This design provides a solid foundation for grammar-aware fuzzing while leveraging our existing ANTLR infrastructure.
\ No newline at end of file
+This approach is much simpler but addresses your specific needs for testing parser performance on particular constructs.
\ No newline at end of file
diff --git a/tools/fuzzing/Makefile b/tools/fuzzing/Makefile
new file mode 100644
index 0000000..20503f2
--- /dev/null
+++ b/tools/fuzzing/Makefile
@@ -0,0 +1,59 @@
+BINARY_NAME=fuzzer
+BUILD_DIR=bin
+CMD_PATH=github.com/bytebase/parser/tools/fuzzing/cmd/fuzzer
+
+.PHONY: all build test clean run help
+
+all: build test
+
+# Build the binary
+build:
+	@echo "Building $(BINARY_NAME)..."
+	@mkdir -p $(BUILD_DIR)
+	go build -o $(BUILD_DIR)/$(BINARY_NAME) $(CMD_PATH)
+
+# Run tests
+test:
+	@echo "Running tests..."
+	go test -v github.com/bytebase/parser/tools/fuzzing/...
+
+# Clean build artifacts  
+clean:
+	@echo "Cleaning..."
+	rm -rf $(BUILD_DIR)
+	go clean
+
+# Run the fuzzer (requires arguments)
+run:
+	go run $(CMD_PATH) $(ARGS)
+
+# Install dependencies
+deps:
+	@echo "Installing dependencies..."
+	cd ../.. && go mod tidy && go mod download
+
+# Format code
+fmt:
+	@echo "Formatting code..."
+	go fmt github.com/bytebase/parser/tools/fuzzing/...
+
+# Run linter
+lint:
+	@echo "Running linter..."
+	golangci-lint run
+
+# Show help
+help:
+	@echo "Available targets:"
+	@echo "  build    - Build the fuzzer binary"
+	@echo "  test     - Run all tests"
+	@echo "  clean    - Clean build artifacts"
+	@echo "  run      - Run the fuzzer (use ARGS='--grammar postgresql --start-rule selectStmt')"
+	@echo "  deps     - Install/update dependencies"
+	@echo "  fmt      - Format all Go code"
+	@echo "  lint     - Run golangci-lint"
+	@echo "  help     - Show this help message"
+	@echo ""
+	@echo "Examples:"
+	@echo "  make run ARGS='--grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 5'"
+	@echo "  make run ARGS='--help'"
\ No newline at end of file
diff --git a/tools/fuzzing/README.md b/tools/fuzzing/README.md
new file mode 100644
index 0000000..64f4409
--- /dev/null
+++ b/tools/fuzzing/README.md
@@ -0,0 +1,124 @@
+# Grammar-Aware Fuzzing Tool
+
+A fuzzing tool that generates valid SQL inputs from ANTLR v4 grammar files for parser testing.
+
+## Quick Start
+
+```bash
+# Build the fuzzer
+make build
+
+# List available grammars
+./bin/fuzzer --list-grammars
+
+# Single combined grammar file
+./bin/fuzzer --grammar combined.g4 --start-rule selectStmt --count 10
+
+# Separate lexer and parser files
+./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 10
+
+# Run with custom parameters  
+./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 3 --max-quantifier 8 --count 5
+```
+
+## Project Structure
+
+```
+tools/fuzzing/
+├── cmd/fuzzer/          # CLI application entry point
+│   └── main.go
+├── internal/            # Private application packages
+│   ├── config/          # Configuration management
+│   └── generator/       # Core fuzzing logic
+├── bin/                 # Built binaries (created by make build)
+├── Makefile            # Build and development tasks
+└── go.mod              # Go module definition
+```
+
+## CLI Options
+
+| Flag | Description | Default |
+|------|-------------|---------|
+| `--grammar` | Grammar file(s): single file or comma-separated lexer,parser | - |
+| `--start-rule` | Starting grammar rule (required) | - |
+| `--count` | Number of queries to generate | 10 |
+| `--max-depth` | Maximum recursion depth | 5 |
+| `--optional-prob` | Probability of optional elements (0.0-1.0) | 0.5 |
+| `--max-quantifier` | Maximum count for `*` and `+` rules | 5 |
+| `--min-quantifier` | Minimum count override | 0 |
+| `--quantifier-count` | Fixed count for all quantifiers | 0 |
+| `--output` | Output file path | stdout |
+| `--seed` | Random seed for reproducible results | current time |
+
+## Examples
+
+### Basic Usage
+```bash
+# Single combined grammar file
+./bin/fuzzer --grammar combined.g4 --start-rule selectStmt --count 10
+
+# Separate lexer and parser files
+./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 10
+
+# Generate CQL expressions with limited depth
+./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 3 --count 5
+```
+
+### Performance Testing
+```bash
+# Generate queries with exactly 100 columns
+./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --quantifier-count 100 --count 5
+
+# Generate deeply nested expressions  
+./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 15 --count 10
+```
+
+### Output Control
+```bash
+# Save to file
+./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 100 --output queries.sql
+
+# Reproducible generation
+./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --seed 42 --count 10
+```
+
+## Development
+
+### Build Commands
+```bash
+# From tools/fuzzing directory
+make build    # Build binary to bin/fuzzer
+make test     # Run all tests  
+make clean    # Clean build artifacts
+make fmt      # Format code
+make deps     # Install/update dependencies (runs from repo root)
+
+# From repository root
+go build -o tools/fuzzing/bin/fuzzer github.com/bytebase/parser/tools/fuzzing/cmd/fuzzer
+```
+
+### Running During Development
+```bash
+# From tools/fuzzing directory
+make run ARGS='--grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 5'
+make run ARGS='--help'
+
+# From repository root
+go run github.com/bytebase/parser/tools/fuzzing/cmd/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 5
+```
+
+## Monolithic Repository Structure
+
+This tool uses the single `go.mod` file at the repository root:
+- **Module**: `github.com/bytebase/parser`
+- **Import path**: `github.com/bytebase/parser/tools/fuzzing/...`
+- **Dependencies**: Shared with other tools in the repository
+
+## Integration
+
+This tool is designed to integrate with:
+- Existing ANTLR v4 grammar parser at `tools/grammar/`
+- All parser implementations in the repository (postgresql, cql, redshift, etc.)
+- Shared CI/CD pipeline and testing infrastructure
+
+**TODO**: Grammar parser integration and actual query generation logic.
\ No newline at end of file
diff --git a/tools/fuzzing/cmd/fuzzer/main.go b/tools/fuzzing/cmd/fuzzer/main.go
new file mode 100644
index 0000000..dd62d7e
--- /dev/null
+++ b/tools/fuzzing/cmd/fuzzer/main.go
@@ -0,0 +1,108 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/bytebase/parser/tools/fuzzing/internal/config"
+	"github.com/bytebase/parser/tools/fuzzing/internal/generator"
+	"github.com/bytebase/parser/tools/fuzzing/internal/grammar"
+)
+
+func main() {
+	cfg := parseFlags()
+	
+	if err := cfg.Validate(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		os.Exit(1)
+	}
+	
+	cfg.Print()
+	
+	gen := generator.New(cfg)
+	if err := gen.Generate(); err != nil {
+		fmt.Fprintf(os.Stderr, "Generation failed: %v\n", err)
+		os.Exit(1)
+	}
+	
+	fmt.Println("Generation completed successfully!")
+}
+
+func parseFlags() *config.Config {
+	cfg := &config.Config{}
+	var listGrammars bool
+	var grammarArg string
+	
+	flag.StringVar(&grammarArg, "grammar", "", "Grammar file(s): single file or comma-separated lexer,parser files")
+	flag.StringVar(&cfg.StartRule, "start-rule", "", "Starting grammar rule name")
+	flag.IntVar(&cfg.Count, "count", 10, "Number of queries to generate")
+	flag.IntVar(&cfg.MaxDepth, "max-depth", 5, "Maximum recursion depth")
+	flag.Float64Var(&cfg.OptionalProb, "optional-prob", 0.5, "Probability of including optional elements (0.0-1.0)")
+	flag.IntVar(&cfg.MaxQuantifier, "max-quantifier", 5, "Maximum count for quantified rules (* and +)")
+	flag.IntVar(&cfg.MinQuantifier, "min-quantifier", 0, "Minimum count for quantified rules (overrides grammar)")
+	flag.IntVar(&cfg.QuantifierCount, "quantifier-count", 0, "Fixed count for all quantifiers (overrides min/max)")
+	flag.StringVar(&cfg.Output, "output", "", "Output file path (default: stdout)")
+	flag.Int64Var(&cfg.Seed, "seed", time.Now().UnixNano(), "Random seed for reproducible generation")
+	flag.BoolVar(&listGrammars, "list-grammars", false, "List all available grammars and exit")
+	
+	// Custom usage message
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Grammar-Aware Fuzzing Tool\n\n")
+		fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "Options:\n")
+		flag.PrintDefaults()
+		
+		fmt.Fprintf(os.Stderr, "\nExamples:\n")
+		fmt.Fprintf(os.Stderr, "  # List available grammars\n")
+		fmt.Fprintf(os.Stderr, "  %s --list-grammars\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  # Single combined grammar file\n")
+		fmt.Fprintf(os.Stderr, "  %s --grammar combined.g4 --start-rule selectStmt --count 10\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  # Separate lexer and parser files\n")
+		fmt.Fprintf(os.Stderr, "  %s --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 10\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  # Control recursion and quantifiers\n")
+		fmt.Fprintf(os.Stderr, "  %s --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 3 --max-quantifier 8 --count 5\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  # Performance testing\n")
+		fmt.Fprintf(os.Stderr, "  %s --grammar redshift/RedshiftLexer.g4,redshift/RedshiftParser.g4 --start-rule blockStmt --quantifier-count 100 --count 10\n\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  # Output to file\n")
+		fmt.Fprintf(os.Stderr, "  %s --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 100 --output queries.sql\n\n", os.Args[0])
+	}
+	
+	flag.Parse()
+	
+	// Handle --list-grammars
+	if listGrammars {
+		grammars, err := grammar.ListAvailableGrammars()
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error listing grammars: %v\n", err)
+			os.Exit(1)
+		}
+		
+		fmt.Println("Available grammars:")
+		for _, g := range grammars {
+			files, err := grammar.DiscoverGrammarFiles(g)
+			if err != nil {
+				fmt.Printf("  %s (error: %v)\n", g, err)
+				continue
+			}
+			fmt.Printf("  %s\n", g)
+			fmt.Printf("    Lexer:  %s\n", files.LexerFile)
+			fmt.Printf("    Parser: %s\n", files.ParserFile)
+		}
+		os.Exit(0)
+	}
+	
+	// Parse grammar files from comma-separated argument
+	if grammarArg != "" {
+		files := strings.Split(grammarArg, ",")
+		// Trim whitespace from each file
+		for i, file := range files {
+			files[i] = strings.TrimSpace(file)
+		}
+		cfg.GrammarFiles = files
+	}
+	
+	return cfg
+}
\ No newline at end of file
diff --git a/tools/fuzzing/internal/config/config.go b/tools/fuzzing/internal/config/config.go
new file mode 100644
index 0000000..d976e37
--- /dev/null
+++ b/tools/fuzzing/internal/config/config.go
@@ -0,0 +1,93 @@
+package config
+
+import (
+	"fmt"
+
+	"github.com/pkg/errors"
+)
+
+// Config holds all configuration options for the fuzzer
+type Config struct {
+	GrammarFiles    []string // Can be one file (combined) or two files (lexer,parser)
+	StartRule       string
+	Count           int
+	MaxDepth        int
+	OptionalProb    float64
+	MaxQuantifier   int
+	MinQuantifier   int
+	QuantifierCount int
+	Output          string
+	Seed            int64
+}
+
+// Validate checks if the configuration is valid
+func (c *Config) Validate() error {
+	if len(c.GrammarFiles) == 0 {
+		return errors.New("--grammar is required")
+	}
+
+	if len(c.GrammarFiles) > 2 {
+		return errors.New("--grammar accepts maximum 2 files (lexer,parser)")
+	}
+
+	if c.StartRule == "" {
+		return errors.New("--start-rule is required")
+	}
+
+	if c.Count <= 0 {
+		return errors.New("--count must be positive")
+	}
+
+	if c.MaxDepth <= 0 {
+		return errors.New("--max-depth must be positive")
+	}
+
+	if c.OptionalProb < 0.0 || c.OptionalProb > 1.0 {
+		return errors.New("--optional-prob must be between 0.0 and 1.0")
+	}
+
+	if c.MaxQuantifier <= 0 {
+		return errors.New("--max-quantifier must be positive")
+	}
+
+	if c.MinQuantifier < 0 {
+		return errors.New("--min-quantifier must be non-negative")
+	}
+
+	if c.MinQuantifier > c.MaxQuantifier {
+		return errors.New("--min-quantifier cannot be greater than --max-quantifier")
+	}
+
+	if c.QuantifierCount < 0 {
+		return errors.New("--quantifier-count must be non-negative")
+	}
+
+	return nil
+}
+
+// Print displays the configuration
+func (c *Config) Print() {
+	fmt.Printf("Grammar-Aware Fuzzer\n")
+	if len(c.GrammarFiles) == 1 {
+		fmt.Printf("Grammar File: %s\n", c.GrammarFiles[0])
+	} else if len(c.GrammarFiles) == 2 {
+		fmt.Printf("Lexer File: %s\n", c.GrammarFiles[0])
+		fmt.Printf("Parser File: %s\n", c.GrammarFiles[1])
+	}
+	fmt.Printf("Start Rule: %s\n", c.StartRule)
+	fmt.Printf("Count: %d\n", c.Count)
+	fmt.Printf("Max Depth: %d\n", c.MaxDepth)
+	fmt.Printf("Optional Probability: %.2f\n", c.OptionalProb)
+	fmt.Printf("Max Quantifier: %d\n", c.MaxQuantifier)
+	if c.MinQuantifier > 0 {
+		fmt.Printf("Min Quantifier: %d\n", c.MinQuantifier)
+	}
+	if c.QuantifierCount > 0 {
+		fmt.Printf("Fixed Quantifier Count: %d\n", c.QuantifierCount)
+	}
+	if c.Output != "" {
+		fmt.Printf("Output: %s\n", c.Output)
+	}
+	fmt.Printf("Seed: %d\n", c.Seed)
+	fmt.Println()
+}
\ No newline at end of file
diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go
new file mode 100644
index 0000000..648cd77
--- /dev/null
+++ b/tools/fuzzing/internal/generator/generator.go
@@ -0,0 +1,217 @@
+package generator
+
+import (
+	"fmt"
+	"math/rand"
+
+	"github.com/bytebase/parser/tools/fuzzing/internal/config"
+	"github.com/bytebase/parser/tools/fuzzing/internal/grammar"
+	"github.com/pkg/errors"
+)
+
+// Generator handles the fuzzing logic
+type Generator struct {
+	config   *config.Config
+	random   *rand.Rand
+	grammars []*grammar.ParsedGrammar
+}
+
+// New creates a new generator with the given configuration
+func New(cfg *config.Config) *Generator {
+	return &Generator{
+		config: cfg,
+		random: rand.New(rand.NewSource(cfg.Seed)),
+	}
+}
+
+// Generate produces the specified number of queries
+func (g *Generator) Generate() error {
+	fmt.Println("Initializing grammar parser...")
+	
+	// Parse all grammar files
+	g.grammars = make([]*grammar.ParsedGrammar, len(g.config.GrammarFiles))
+	for i, filePath := range g.config.GrammarFiles {
+		parsedGrammar, err := grammar.ParseGrammarFile(filePath)
+		if err != nil {
+			return errors.Wrapf(err, "failed to parse grammar file %s", filePath)
+		}
+		g.grammars[i] = parsedGrammar
+		fmt.Printf("Parsed grammar file: %s\n", filePath)
+	}
+
+	// Validate start rule exists
+	if !g.hasRule(g.config.StartRule) {
+		return errors.Errorf("start rule '%s' not found in any grammar file", g.config.StartRule)
+	}
+
+	fmt.Printf("Generating %d queries from rule '%s'...\n", g.config.Count, g.config.StartRule)
+	
+	// Generate queries
+	for i := 0; i < g.config.Count; i++ {
+		query := g.generateQuery(i + 1)
+		fmt.Printf("Query %d: %s\n", i+1, query)
+	}
+
+	return nil
+}
+
+// hasRule checks if a rule exists in any of the parsed grammars
+func (g *Generator) hasRule(ruleName string) bool {
+	for _, grammar := range g.grammars {
+		if grammar.GetRule(ruleName) != nil {
+			return true
+		}
+	}
+	return false
+}
+
+// getRule gets a rule from any of the parsed grammars
+func (g *Generator) getRule(ruleName string) *grammar.Rule {
+	for _, grammar := range g.grammars {
+		if rule := grammar.GetRule(ruleName); rule != nil {
+			return rule
+		}
+	}
+	return nil
+}
+
+// generateQuery creates a single query using grammar rules
+func (g *Generator) generateQuery(index int) string {
+	// Start generation from the specified start rule
+	result := g.generateFromRule(g.config.StartRule, 0)
+	return result
+}
+
+// generateFromRule recursively generates text from a grammar rule
+func (g *Generator) generateFromRule(ruleName string, currentDepth int) string {
+	// Check depth limit to prevent infinite recursion
+	if currentDepth >= g.config.MaxDepth {
+		return g.generateTerminal(ruleName)
+	}
+
+	// Get the rule
+	rule := g.getRule(ruleName)
+	if rule == nil {
+		// If rule not found, return placeholder
+		return fmt.Sprintf("<%s>", ruleName)
+	}
+
+	// Select a random alternative
+	if len(rule.Alternatives) == 0 {
+		return fmt.Sprintf("<%s>", ruleName)
+	}
+	
+	altIndex := g.random.Intn(len(rule.Alternatives))
+	alternative := rule.Alternatives[altIndex]
+
+	// Generate from all elements in the alternative
+	var result []string
+	for _, element := range alternative.Elements {
+		elementResult := g.generateFromElement(&element, currentDepth)
+		if elementResult != "" {
+			result = append(result, elementResult)
+		}
+	}
+
+	return fmt.Sprintf("/* %s */ %s", ruleName, joinWithSpaces(result))
+}
+
+// generateFromElement generates text from a single grammar element
+func (g *Generator) generateFromElement(element *grammar.Element, currentDepth int) string {
+	// Handle optional elements
+	if element.IsOptional() && g.random.Float64() > g.config.OptionalProb {
+		return "" // Skip optional element
+	}
+
+	// Handle quantified elements
+	if element.IsQuantified() {
+		return g.generateQuantified(element, currentDepth)
+	}
+
+	// Generate single element
+	if element.IsRule() {
+		return g.generateFromRule(element.Value, currentDepth+1)
+	} else if element.IsTerminal() {
+		return cleanLiteral(element.Value)
+	}
+
+	return element.Value
+}
+
+// generateQuantified handles quantified elements (* + {n,m})
+func (g *Generator) generateQuantified(element *grammar.Element, currentDepth int) string {
+	var count int
+	
+	switch element.Quantifier {
+	case grammar.ZERO_MORE: // *
+		count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier
+	case grammar.ONE_MORE: // +
+		count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier
+	case grammar.RANGE: // {n,m}
+		if g.config.QuantifierCount > 0 {
+			count = g.config.QuantifierCount
+		} else {
+			rangeSize := element.Max - element.Min + 1
+			count = element.Min + g.random.Intn(rangeSize)
+		}
+	default:
+		count = 1
+	}
+
+	var results []string
+	for i := 0; i < count; i++ {
+		if element.IsRule() {
+			result := g.generateFromRule(element.Value, currentDepth+1)
+			results = append(results, result)
+		} else if element.IsTerminal() {
+			results = append(results, cleanLiteral(element.Value))
+		}
+	}
+
+	return joinWithSpaces(results)
+}
+
+// generateTerminal generates a terminal when depth limit is reached
+func (g *Generator) generateTerminal(ruleName string) string {
+	// For depth-limited cases, return a simple placeholder
+	return fmt.Sprintf("<%s_TERM>", ruleName)
+}
+
+// cleanLiteral removes quotes from literal strings
+func cleanLiteral(literal string) string {
+	// Remove single quotes from literals like 'SELECT'
+	if len(literal) >= 2 && literal[0] == '\'' && literal[len(literal)-1] == '\'' {
+		return literal[1 : len(literal)-1]
+	}
+	return literal
+}
+
+// joinWithSpaces joins strings with spaces, skipping empty strings
+func joinWithSpaces(strs []string) string {
+	var nonEmpty []string
+	for _, s := range strs {
+		if s != "" {
+			nonEmpty = append(nonEmpty, s)
+		}
+	}
+	if len(nonEmpty) == 0 {
+		return ""
+	}
+	return joinStrings(nonEmpty, " ")
+}
+
+// joinStrings joins strings with a separator
+func joinStrings(strs []string, sep string) string {
+	if len(strs) == 0 {
+		return ""
+	}
+	if len(strs) == 1 {
+		return strs[0]
+	}
+	
+	result := strs[0]
+	for i := 1; i < len(strs); i++ {
+		result += sep + strs[i]
+	}
+	return result
+}
\ No newline at end of file
diff --git a/tools/fuzzing/internal/grammar/discovery.go b/tools/fuzzing/internal/grammar/discovery.go
new file mode 100644
index 0000000..9aa2f51
--- /dev/null
+++ b/tools/fuzzing/internal/grammar/discovery.go
@@ -0,0 +1,187 @@
+package grammar
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/pkg/errors"
+)
+
+// GrammarFiles represents a pair of lexer and parser grammar files
+type GrammarFiles struct {
+	LexerFile  string
+	ParserFile string
+	Directory  string
+}
+
+// DiscoverGrammarFiles finds lexer and parser files for a given grammar name
+func DiscoverGrammarFiles(grammarName string) (*GrammarFiles, error) {
+	// Start from fuzzing directory, go up to parser root
+	currentDir, err := os.Getwd()
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to get current directory")
+	}
+	
+	// Navigate to parser root (assuming we're in tools/fuzzing)
+	repoRoot := filepath.Join(currentDir, "..", "..")
+	
+	// Try different grammar directory patterns
+	grammarDirs := []string{
+		filepath.Join(repoRoot, grammarName),                    // Direct: postgresql/, cql/
+		filepath.Join(repoRoot, "tools", "grammar"),             // ANTLR v4 self-grammar
+		filepath.Join(repoRoot, "grammars", grammarName),        // Alternative structure
+	}
+	
+	for _, dir := range grammarDirs {
+		if files, err := findGrammarFilesInDir(dir, grammarName); err == nil {
+			return files, nil
+		}
+	}
+	
+	return nil, errors.Errorf("grammar '%s' not found in any of the expected locations", grammarName)
+}
+
+// findGrammarFilesInDir searches for grammar files in a specific directory
+func findGrammarFilesInDir(dir, grammarName string) (*GrammarFiles, error) {
+	if _, err := os.Stat(dir); os.IsNotExist(err) {
+		return nil, errors.Errorf("directory does not exist: %s", dir)
+	}
+	
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return nil, errors.Wrapf(err, "failed to read directory %s", dir)
+	}
+	
+	var lexerFile, parserFile string
+	
+	// Look for grammar files using different naming patterns
+	patterns := []struct {
+		lexerPattern  string
+		parserPattern string
+	}{
+		// Standard patterns: PostgreSQLLexer.g4, PostgreSQLParser.g4  
+		{fmt.Sprintf("%sLexer.g4", capitalize(grammarName)), fmt.Sprintf("%sParser.g4", capitalize(grammarName))},
+		// Special case for PostgreSQL: postgresql -> PostgreSQL
+		{fmt.Sprintf("%sLexer.g4", strings.ToUpper(grammarName)), fmt.Sprintf("%sParser.g4", strings.ToUpper(grammarName))},
+		// Alternate patterns: CqlLexer.g4, CqlParser.g4  
+		{fmt.Sprintf("%sLexer.g4", strings.Title(grammarName)), fmt.Sprintf("%sParser.g4", strings.Title(grammarName))},
+		// Lowercase patterns: postgresql_lexer.g4, postgresql_parser.g4
+		{fmt.Sprintf("%s_lexer.g4", strings.ToLower(grammarName)), fmt.Sprintf("%s_parser.g4", strings.ToLower(grammarName))},
+	}
+	
+	// Special cases for known grammar naming conventions
+	switch strings.ToLower(grammarName) {
+	case "postgresql":
+		patterns = append(patterns, struct {
+			lexerPattern  string
+			parserPattern string
+		}{"PostgreSQLLexer.g4", "PostgreSQLParser.g4"})
+	case "antlrv4":
+		patterns = append(patterns, struct {
+			lexerPattern  string
+			parserPattern string
+		}{"ANTLRv4Lexer.g4", "ANTLRv4Parser.g4"})
+	}
+	
+	// Special case for ANTLR v4 self-grammar directory  
+	if strings.Contains(dir, "tools/grammar") {
+		patterns = append(patterns, struct {
+			lexerPattern  string
+			parserPattern string
+		}{"ANTLRv4Lexer.g4", "ANTLRv4Parser.g4"})
+	}
+	
+	for _, entry := range entries {
+		if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".g4") {
+			for _, pattern := range patterns {
+				if entry.Name() == pattern.lexerPattern {
+					lexerFile = filepath.Join(dir, entry.Name())
+				}
+				if entry.Name() == pattern.parserPattern {
+					parserFile = filepath.Join(dir, entry.Name())
+				}
+			}
+		}
+	}
+	
+	// Check if we found both files
+	if lexerFile == "" {
+		return nil, errors.Errorf("lexer file not found in %s", dir)
+	}
+	if parserFile == "" {
+		return nil, errors.Errorf("parser file not found in %s", dir)
+	}
+	
+	return &GrammarFiles{
+		LexerFile:  lexerFile,
+		ParserFile: parserFile,
+		Directory:  dir,
+	}, nil
+}
+
+// ListAvailableGrammars scans for all available grammar directories
+func ListAvailableGrammars() ([]string, error) {
+	currentDir, err := os.Getwd()
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to get current directory")
+	}
+	
+	repoRoot := filepath.Join(currentDir, "..", "..")
+	
+	var grammars []string
+	
+	// Scan for grammar directories
+	entries, err := os.ReadDir(repoRoot)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to read repository root")
+	}
+	
+	for _, entry := range entries {
+		if entry.IsDir() {
+			dirPath := filepath.Join(repoRoot, entry.Name())
+			if hasGrammarFiles(dirPath) {
+				grammars = append(grammars, entry.Name())
+			}
+		}
+	}
+	
+	// Add special case for ANTLR v4 self-grammar
+	if hasGrammarFiles(filepath.Join(repoRoot, "tools", "grammar")) {
+		grammars = append(grammars, "antlrv4")
+	}
+	
+	return grammars, nil
+}
+
+// hasGrammarFiles checks if a directory contains .g4 files
+func hasGrammarFiles(dir string) bool {
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return false
+	}
+	
+	var hasLexer, hasParser bool
+	for _, entry := range entries {
+		if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".g4") {
+			name := strings.ToLower(entry.Name())
+			if strings.Contains(name, "lexer") {
+				hasLexer = true
+			}
+			if strings.Contains(name, "parser") {
+				hasParser = true
+			}
+		}
+	}
+	
+	return hasLexer && hasParser
+}
+
+// capitalize capitalizes the first letter of a string, preserving the rest
+func capitalize(s string) string {
+	if len(s) == 0 {
+		return s
+	}
+	return strings.ToUpper(s[:1]) + s[1:]
+}
\ No newline at end of file
diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go
new file mode 100644
index 0000000..34cc05b
--- /dev/null
+++ b/tools/fuzzing/internal/grammar/parser.go
@@ -0,0 +1,259 @@
+package grammar
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/antlr4-go/antlr/v4"
+	"github.com/pkg/errors"
+	grammar "github.com/bytebase/parser/tools/grammar"
+)
+
+// ParsedGrammar represents a parsed grammar with extracted rules
+type ParsedGrammar struct {
+	LexerRules  map[string]*Rule
+	ParserRules map[string]*Rule
+	FilePath    string
+}
+
+// Rule represents a grammar rule with its alternatives
+type Rule struct {
+	Name         string
+	Alternatives []Alternative
+	IsLexer      bool
+}
+
+// Alternative represents one alternative of a rule
+type Alternative struct {
+	Elements []Element
+}
+
+// Element represents an element within an alternative
+type Element struct {
+	Type       ElementType
+	Value      string
+	Quantifier Quantifier
+	Min, Max   int // for {n,m} quantifiers
+}
+
+// ElementType indicates the type of grammar element
+type ElementType int
+
+const (
+	RULE_REF ElementType = iota
+	TOKEN_REF
+	LITERAL
+	OPTIONAL
+	QUANTIFIED
+)
+
+// Quantifier indicates repetition type
+type Quantifier int
+
+const (
+	NONE Quantifier = iota
+	OPTIONAL_Q // ?
+	ZERO_MORE  // *
+	ONE_MORE   // +
+	RANGE      // {n,m}
+)
+
+// ParseGrammarFile parses a .g4 file and extracts rules for fuzzing
+func ParseGrammarFile(filePath string) (*ParsedGrammar, error) {
+	// Read file content
+	content, err := os.ReadFile(filePath)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to read grammar file")
+	}
+
+	if len(content) == 0 {
+		return nil, errors.New("grammar file is empty")
+	}
+
+	// Create input stream
+	input := antlr.NewInputStream(string(content))
+
+	// Create lexer
+	lexer := grammar.NewANTLRv4Lexer(input)
+
+	// Add error listener
+	errorListener := &GrammarErrorListener{}
+	lexer.RemoveErrorListeners()
+	lexer.AddErrorListener(errorListener)
+
+	// Create token stream
+	stream := antlr.NewCommonTokenStream(lexer, 0)
+
+	// Create parser
+	parser := grammar.NewANTLRv4Parser(stream)
+
+	// Add error listener to parser
+	parser.RemoveErrorListeners()
+	parser.AddErrorListener(errorListener)
+
+	// Parse the grammar
+	tree := parser.GrammarSpec()
+
+	// Check for parsing errors
+	if errorListener.HasErrors() {
+		return nil, errors.Errorf("failed to parse grammar: %v", errorListener.GetErrors())
+	}
+
+	if tree == nil {
+		return nil, errors.New("parser returned nil tree")
+	}
+
+	// Extract rules from parse tree
+	visitor := &GrammarExtractorVisitor{
+		lexerRules:  make(map[string]*Rule),
+		parserRules: make(map[string]*Rule),
+	}
+
+	visitor.Visit(tree)
+
+	return &ParsedGrammar{
+		LexerRules:  visitor.lexerRules,
+		ParserRules: visitor.parserRules,
+		FilePath:    filePath,
+	}, nil
+}
+
+// GetRule gets a rule by name from either lexer or parser rules
+func (g *ParsedGrammar) GetRule(name string) *Rule {
+	if rule, ok := g.ParserRules[name]; ok {
+		return rule
+	}
+	if rule, ok := g.LexerRules[name]; ok {
+		return rule
+	}
+	return nil
+}
+
+// GetAllRules returns all rules (both lexer and parser)
+func (g *ParsedGrammar) GetAllRules() map[string]*Rule {
+	allRules := make(map[string]*Rule)
+	for name, rule := range g.LexerRules {
+		allRules[name] = rule
+	}
+	for name, rule := range g.ParserRules {
+		allRules[name] = rule
+	}
+	return allRules
+}
+
+// IsRule checks if an element refers to another rule
+func (e *Element) IsRule() bool {
+	return e.Type == RULE_REF || e.Type == TOKEN_REF
+}
+
+// IsTerminal checks if an element is a terminal (literal)
+func (e *Element) IsTerminal() bool {
+	return e.Type == LITERAL
+}
+
+// IsOptional checks if an element has optional quantifier
+func (e *Element) IsOptional() bool {
+	return e.Quantifier == OPTIONAL_Q
+}
+
+// IsQuantified checks if an element has repetition quantifiers
+func (e *Element) IsQuantified() bool {
+	return e.Quantifier == ZERO_MORE || e.Quantifier == ONE_MORE || e.Quantifier == RANGE
+}
+
+// GrammarErrorListener collects parsing errors
+type GrammarErrorListener struct {
+	errors []string
+}
+
+func (l *GrammarErrorListener) SyntaxError(recognizer antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) {
+	l.errors = append(l.errors, fmt.Sprintf("line %d:%d %s", line, column, msg))
+}
+
+func (l *GrammarErrorListener) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs *antlr.ATNConfigSet) {
+	// Ignore ambiguity for fuzzing purposes
+}
+
+func (l *GrammarErrorListener) ReportAttemptingFullContext(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, conflictingAlts *antlr.BitSet, configs *antlr.ATNConfigSet) {
+	// Ignore for fuzzing purposes
+}
+
+func (l *GrammarErrorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex, prediction int, configs *antlr.ATNConfigSet) {
+	// Ignore for fuzzing purposes
+}
+
+func (l *GrammarErrorListener) HasErrors() bool {
+	return len(l.errors) > 0
+}
+
+func (l *GrammarErrorListener) GetErrors() []string {
+	return l.errors
+}
+
+// GrammarExtractorVisitor extracts rules from the parse tree
+type GrammarExtractorVisitor struct {
+	antlr.ParseTreeVisitor
+	lexerRules  map[string]*Rule
+	parserRules map[string]*Rule
+	isLexer     bool
+}
+
+func (v *GrammarExtractorVisitor) Visit(tree antlr.ParseTree) interface{} {
+	// TODO: Implement tree visiting to extract rules
+	// This is a placeholder - we'll implement the actual visitor logic
+	// to walk the parse tree and extract rule information
+	
+	// For now, let's create a simple placeholder structure
+	v.extractPlaceholderRules()
+	
+	return nil
+}
+
+// extractPlaceholderRules creates placeholder rules for testing
+func (v *GrammarExtractorVisitor) extractPlaceholderRules() {
+	// Add some basic rules for testing
+	v.parserRules["selectStmt"] = &Rule{
+		Name:    "selectStmt",
+		IsLexer: false,
+		Alternatives: []Alternative{
+			{
+				Elements: []Element{
+					{Type: LITERAL, Value: "SELECT"},
+					{Type: RULE_REF, Value: "columnList"},
+					{Type: LITERAL, Value: "FROM"},
+					{Type: RULE_REF, Value: "tableRef"},
+					{Type: RULE_REF, Value: "whereClause", Quantifier: OPTIONAL_Q},
+				},
+			},
+		},
+	}
+	
+	v.parserRules["columnList"] = &Rule{
+		Name:    "columnList",
+		IsLexer: false,
+		Alternatives: []Alternative{
+			{
+				Elements: []Element{
+					{Type: RULE_REF, Value: "column"},
+					{
+						Type:       RULE_REF,
+						Value:      "column",
+						Quantifier: ZERO_MORE,
+					},
+				},
+			},
+		},
+	}
+
+	v.lexerRules["SELECT"] = &Rule{
+		Name:    "SELECT",
+		IsLexer: true,
+		Alternatives: []Alternative{
+			{
+				Elements: []Element{
+					{Type: LITERAL, Value: "'SELECT'"},
+				},
+			},
+		},
+	}
+}
\ No newline at end of file

From 32bae30217f4e57a7a953a07bde212b39ca61c20 Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Tue, 26 Aug 2025 14:45:17 +0800
Subject: [PATCH 2/9] fix: remove range

---
 tools/fuzzing/DESIGN.md                       | 22 ++--------------
 tools/fuzzing/README.md                       |  2 +-
 tools/fuzzing/internal/generator/generator.go | 26 +++++++++----------
 tools/fuzzing/internal/grammar/parser.go      |  4 +--
 4 files changed, 16 insertions(+), 38 deletions(-)

diff --git a/tools/fuzzing/DESIGN.md b/tools/fuzzing/DESIGN.md
index e78484f..5d05ad3 100644
--- a/tools/fuzzing/DESIGN.md
+++ b/tools/fuzzing/DESIGN.md
@@ -166,15 +166,7 @@ selectList: column (',' column)*  // Generate 1 to N columns
 identifier: LETTER (LETTER | DIGIT)+  // Generate 1 to N characters
 ```
 
-**Exact Count (`rule{n}`):**
-```antlr
-hexDigit: HEX_DIGIT{4}         // Generate exactly 4 hex digits
-```
-
-**Range Count (`rule{min,max}`):**
-```antlr
-varchar: CHAR{1,255}           // Generate 1 to 255 characters
-```
+**Note**: ANTLR v4 does not support `{n}` or `{n,m}` quantifier syntax. These are regex-style quantifiers not supported in ANTLR grammar files.
 
 #### Quantifier Control Strategy
 
@@ -218,17 +210,7 @@ func (g *Generator) generateQuantified(element *GrammarElement, config Quantifie
         max := min(config.MaxRepeat, 50)
         count = g.selectCount(min, max, config.Strategy)
         
-    case "{n}": // Exact count
-        if config.Strategy == "fixed" {
-            count = config.FixedCount
-        } else {
-            count = element.ExactCount
-        }
-        
-    case "{min,max}": // Range
-        min := max(element.MinCount, config.MinRepeat)
-        max := min(element.MaxCount, config.MaxRepeat)
-        count = g.selectCount(min, max, config.Strategy)
+    // Note: ANTLR v4 does not support {n} or {min,max} syntax
     }
     
     result := ""
diff --git a/tools/fuzzing/README.md b/tools/fuzzing/README.md
index 64f4409..f496ff1 100644
--- a/tools/fuzzing/README.md
+++ b/tools/fuzzing/README.md
@@ -44,7 +44,7 @@ tools/fuzzing/
 | `--count` | Number of queries to generate | 10 |
 | `--max-depth` | Maximum recursion depth | 5 |
 | `--optional-prob` | Probability of optional elements (0.0-1.0) | 0.5 |
-| `--max-quantifier` | Maximum count for `*` and `+` rules | 5 |
+| `--max-quantifier` | Maximum count for `*` and `+` quantifiers | 5 |
 | `--min-quantifier` | Minimum count override | 0 |
 | `--quantifier-count` | Fixed count for all quantifiers | 0 |
 | `--output` | Output file path | stdout |
diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go
index 648cd77..d8eed47 100644
--- a/tools/fuzzing/internal/generator/generator.go
+++ b/tools/fuzzing/internal/generator/generator.go
@@ -138,24 +138,22 @@ func (g *Generator) generateFromElement(element *grammar.Element, currentDepth i
 	return element.Value
 }
 
-// generateQuantified handles quantified elements (* + {n,m})
+// generateQuantified handles quantified elements (* +)
 func (g *Generator) generateQuantified(element *grammar.Element, currentDepth int) string {
 	var count int
 	
-	switch element.Quantifier {
-	case grammar.ZERO_MORE: // *
-		count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier
-	case grammar.ONE_MORE: // +
-		count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier
-	case grammar.RANGE: // {n,m}
-		if g.config.QuantifierCount > 0 {
-			count = g.config.QuantifierCount
-		} else {
-			rangeSize := element.Max - element.Min + 1
-			count = element.Min + g.random.Intn(rangeSize)
+	// Use fixed count if specified, otherwise use random count
+	if g.config.QuantifierCount > 0 {
+		count = g.config.QuantifierCount
+	} else {
+		switch element.Quantifier {
+		case grammar.ZERO_MORE: // *
+			count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier
+		case grammar.ONE_MORE: // +
+			count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier
+		default:
+			count = 1
 		}
-	default:
-		count = 1
 	}
 
 	var results []string
diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go
index 34cc05b..124cea0 100644
--- a/tools/fuzzing/internal/grammar/parser.go
+++ b/tools/fuzzing/internal/grammar/parser.go
@@ -33,7 +33,6 @@ type Element struct {
 	Type       ElementType
 	Value      string
 	Quantifier Quantifier
-	Min, Max   int // for {n,m} quantifiers
 }
 
 // ElementType indicates the type of grammar element
@@ -55,7 +54,6 @@ const (
 	OPTIONAL_Q // ?
 	ZERO_MORE  // *
 	ONE_MORE   // +
-	RANGE      // {n,m}
 )
 
 // ParseGrammarFile parses a .g4 file and extracts rules for fuzzing
@@ -158,7 +156,7 @@ func (e *Element) IsOptional() bool {
 
 // IsQuantified checks if an element has repetition quantifiers
 func (e *Element) IsQuantified() bool {
-	return e.Quantifier == ZERO_MORE || e.Quantifier == ONE_MORE || e.Quantifier == RANGE
+	return e.Quantifier == ZERO_MORE || e.Quantifier == ONE_MORE
 }
 
 // GrammarErrorListener collects parsing errors

From a3c23e6b49c4c395cdbf884d11aaf813faba50a1 Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Thu, 28 Aug 2025 10:29:54 +0800
Subject: [PATCH 3/9] feat: parse grammar IR

---
 tools/fuzzing/internal/generator/generator.go |  54 ++-
 tools/fuzzing/internal/grammar/parser.go      | 443 +++++++++++++++---
 tools/fuzzing/internal/grammar/parser_test.go | 219 +++++++++
 3 files changed, 635 insertions(+), 81 deletions(-)
 create mode 100644 tools/fuzzing/internal/grammar/parser_test.go

diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go
index d8eed47..207c677 100644
--- a/tools/fuzzing/internal/generator/generator.go
+++ b/tools/fuzzing/internal/generator/generator.go
@@ -130,12 +130,20 @@ func (g *Generator) generateFromElement(element *grammar.Element, currentDepth i
 
 	// Generate single element
 	if element.IsRule() {
-		return g.generateFromRule(element.Value, currentDepth+1)
+		if refValue, ok := element.Value.(grammar.ReferenceValue); ok {
+			return g.generateFromRule(refValue.Name, currentDepth+1)
+		} else if blockValue, ok := element.Value.(grammar.BlockValue); ok {
+			return g.generateFromBlock(blockValue, currentDepth)
+		}
+		return g.generateFromRule(element.Value.String(), currentDepth+1)
 	} else if element.IsTerminal() {
-		return cleanLiteral(element.Value)
+		if litValue, ok := element.Value.(grammar.LiteralValue); ok {
+			return cleanLiteral(litValue.Text)
+		}
+		return cleanLiteral(element.Value.String())
 	}
 
-	return element.Value
+	return element.Value.String()
 }
 
 // generateQuantified handles quantified elements (* +)
@@ -159,16 +167,50 @@ func (g *Generator) generateQuantified(element *grammar.Element, currentDepth in
 	var results []string
 	for i := 0; i < count; i++ {
 		if element.IsRule() {
-			result := g.generateFromRule(element.Value, currentDepth+1)
-			results = append(results, result)
+			if refValue, ok := element.Value.(grammar.ReferenceValue); ok {
+				result := g.generateFromRule(refValue.Name, currentDepth+1)
+				results = append(results, result)
+			} else {
+				result := g.generateFromRule(element.Value.String(), currentDepth+1)
+				results = append(results, result)
+			}
 		} else if element.IsTerminal() {
-			results = append(results, cleanLiteral(element.Value))
+			if litValue, ok := element.Value.(grammar.LiteralValue); ok {
+				results = append(results, cleanLiteral(litValue.Text))
+			} else {
+				results = append(results, cleanLiteral(element.Value.String()))
+			}
+		} else if blockValue, ok := element.Value.(grammar.BlockValue); ok {
+			result := g.generateFromBlock(blockValue, currentDepth+1)
+			results = append(results, result)
 		}
 	}
 
 	return joinWithSpaces(results)
 }
 
+// generateFromBlock generates content from a block value
+func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, currentDepth int) string {
+	if len(blockValue.Alternatives) == 0 {
+		return ""
+	}
+
+	// Select a random alternative from the block
+	altIndex := g.random.Intn(len(blockValue.Alternatives))
+	alternative := blockValue.Alternatives[altIndex]
+
+	// Generate from all elements in the selected alternative
+	var result []string
+	for _, element := range alternative.Elements {
+		elementResult := g.generateFromElement(&element, currentDepth)
+		if elementResult != "" {
+			result = append(result, elementResult)
+		}
+	}
+
+	return joinWithSpaces(result)
+}
+
 // generateTerminal generates a terminal when depth limit is reached
 func (g *Generator) generateTerminal(ruleName string) string {
 	// For depth-limited cases, return a simple placeholder
diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go
index 124cea0..0e24435 100644
--- a/tools/fuzzing/internal/grammar/parser.go
+++ b/tools/fuzzing/internal/grammar/parser.go
@@ -3,6 +3,7 @@ package grammar
 import (
 	"fmt"
 	"os"
+	"strings"
 
 	"github.com/antlr4-go/antlr/v4"
 	"github.com/pkg/errors"
@@ -14,6 +15,9 @@ type ParsedGrammar struct {
 	LexerRules  map[string]*Rule
 	ParserRules map[string]*Rule
 	FilePath    string
+	// BlockAltMap stores temporary block rules for debugging
+	// Key: block ID (e.g., "block_1_alts"), Value: the block alternatives
+	BlockAltMap map[string][]Alternative
 }
 
 // Rule represents a grammar rule with its alternatives
@@ -28,24 +32,61 @@ type Alternative struct {
 	Elements []Element
 }
 
+// Global block ID counter for generating unique block names
+var globalBlockID = 0
+
+// ElementValue represents different types of element values
+type ElementValue interface {
+	// String returns a string representation for display/debugging
+	String() string
+}
+
+// LiteralValue represents a literal string (e.g., 'SELECT')
+type LiteralValue struct {
+	Text string
+}
+
+func (l LiteralValue) String() string { return l.Text }
+
+// ReferenceValue represents a reference to a rule or token (e.g., IDENTIFIER, selectStmt)
+type ReferenceValue struct {
+	Name string
+}
+
+func (r ReferenceValue) String() string { return r.Name }
+
+// BlockValue represents a generated block (e.g., (',' column)*)
+type BlockValue struct {
+	ID           string        // Global unique ID like "block_1_alts"
+	Alternatives []Alternative
+}
+
+func (b BlockValue) String() string {
+	if len(b.Alternatives) == 0 {
+		return "<empty_block>"
+	}
+	if len(b.Alternatives) == 1 {
+		elements := []string{}
+		for _, elem := range b.Alternatives[0].Elements {
+			elements = append(elements, elem.Value.String())
+		}
+		return fmt.Sprintf("(%s)", strings.Join(elements, " "))
+	}
+	return b.ID
+}
+
+
+// WildcardValue represents a wildcard (.)
+type WildcardValue struct{}
+
+func (w WildcardValue) String() string { return "." }
+
 // Element represents an element within an alternative
 type Element struct {
-	Type       ElementType
-	Value      string
+	Value      ElementValue
 	Quantifier Quantifier
 }
 
-// ElementType indicates the type of grammar element
-type ElementType int
-
-const (
-	RULE_REF ElementType = iota
-	TOKEN_REF
-	LITERAL
-	OPTIONAL
-	QUANTIFIED
-)
-
 // Quantifier indicates repetition type
 type Quantifier int
 
@@ -102,17 +143,16 @@ func ParseGrammarFile(filePath string) (*ParsedGrammar, error) {
 	}
 
 	// Extract rules from parse tree
-	visitor := &GrammarExtractorVisitor{
-		lexerRules:  make(map[string]*Rule),
-		parserRules: make(map[string]*Rule),
-	}
+	visitor := NewGrammarExtractorVisitor()
+	visitor.VisitGrammarSpec(tree)
+
 
-	visitor.Visit(tree)
 
 	return &ParsedGrammar{
 		LexerRules:  visitor.lexerRules,
 		ParserRules: visitor.parserRules,
 		FilePath:    filePath,
+		BlockAltMap: visitor.blockAltMap,
 	}, nil
 }
 
@@ -139,14 +179,30 @@ func (g *ParsedGrammar) GetAllRules() map[string]*Rule {
 	return allRules
 }
 
-// IsRule checks if an element refers to another rule
+// GetBlockAlternatives returns the alternatives for a generated block ID
+func (g *ParsedGrammar) GetBlockAlternatives(blockID string) ([]Alternative, bool) {
+	alts, exists := g.BlockAltMap[blockID]
+	return alts, exists
+}
+
+// IsGeneratedBlock checks if a name refers to a generated block
+func (g *ParsedGrammar) IsGeneratedBlock(name string) bool {
+	_, exists := g.BlockAltMap[name]
+	return exists
+}
+
+// IsRule checks if an element refers to another rule or generated block
 func (e *Element) IsRule() bool {
-	return e.Type == RULE_REF || e.Type == TOKEN_REF
+	_, isRef := e.Value.(ReferenceValue)
+	_, isBlock := e.Value.(BlockValue)
+	return isRef || isBlock
 }
 
 // IsTerminal checks if an element is a terminal (literal)
 func (e *Element) IsTerminal() bool {
-	return e.Type == LITERAL
+	_, isLit := e.Value.(LiteralValue)
+	_, isWild := e.Value.(WildcardValue)
+	return isLit || isWild
 }
 
 // IsOptional checks if an element has optional quantifier
@@ -190,68 +246,305 @@ func (l *GrammarErrorListener) GetErrors() []string {
 
 // GrammarExtractorVisitor extracts rules from the parse tree
 type GrammarExtractorVisitor struct {
-	antlr.ParseTreeVisitor
+	*grammar.BaseANTLRv4ParserVisitor
 	lexerRules  map[string]*Rule
 	parserRules map[string]*Rule
-	isLexer     bool
+	blockAltMap map[string][]Alternative
 }
 
-func (v *GrammarExtractorVisitor) Visit(tree antlr.ParseTree) interface{} {
-	// TODO: Implement tree visiting to extract rules
-	// This is a placeholder - we'll implement the actual visitor logic
-	// to walk the parse tree and extract rule information
-	
-	// For now, let's create a simple placeholder structure
-	v.extractPlaceholderRules()
-	
+// NewGrammarExtractorVisitor creates a new visitor
+func NewGrammarExtractorVisitor() *GrammarExtractorVisitor {
+	v := &GrammarExtractorVisitor{
+		BaseANTLRv4ParserVisitor: &grammar.BaseANTLRv4ParserVisitor{},
+		lexerRules:               make(map[string]*Rule),
+		parserRules:              make(map[string]*Rule),
+		blockAltMap:              make(map[string][]Alternative),
+	}
+	return v
+}
+
+// VisitGrammarSpec visits the grammar specification
+func (v *GrammarExtractorVisitor) VisitGrammarSpec(ctx grammar.IGrammarSpecContext) interface{} {
+	// Visit rules section
+	if rulesCtx := ctx.Rules(); rulesCtx != nil {
+		v.VisitRules(rulesCtx)
+	}
+	return nil
+}
+
+// VisitRules visits the rules section
+func (v *GrammarExtractorVisitor) VisitRules(ctx grammar.IRulesContext) interface{} {
+	// Visit all rule specifications
+	for _, ruleSpecCtx := range ctx.AllRuleSpec() {
+		v.VisitRuleSpec(ruleSpecCtx)
+	}
+	return nil
+}
+
+// VisitRuleSpec visits a rule specification (could be parser or lexer rule)
+func (v *GrammarExtractorVisitor) VisitRuleSpec(ctx grammar.IRuleSpecContext) interface{} {
+	// Focus only on parser rules for now
+	if parserRuleCtx := ctx.ParserRuleSpec(); parserRuleCtx != nil {
+		v.VisitParserRuleSpec(parserRuleCtx)
+	}
+	// Skip lexer rules for now
+	return nil
+}
+
+// VisitParserRuleSpec visits a parser rule specification
+func (v *GrammarExtractorVisitor) VisitParserRuleSpec(ctx grammar.IParserRuleSpecContext) interface{} {
+	// Get rule name
+	ruleNameToken := ctx.RULE_REF()
+	if ruleNameToken == nil {
+		return nil
+	}
+	ruleName := ruleNameToken.GetText()
+
+	// Get rule block (alternatives)
+	ruleBlockCtx := ctx.RuleBlock()
+	if ruleBlockCtx == nil {
+		return nil
+	}
+
+	// Extract alternatives
+	alternatives := v.extractAlternatives(ruleBlockCtx)
+
+	// Create rule
+	rule := &Rule{
+		Name:         ruleName,
+		IsLexer:      false,
+		Alternatives: alternatives,
+	}
+
+	// Store rule
+	v.parserRules[ruleName] = rule
+
+	return nil
+}
+
+// extractAlternatives extracts alternatives from a rule block
+func (v *GrammarExtractorVisitor) extractAlternatives(ruleBlockCtx grammar.IRuleBlockContext) []Alternative {
+	var alternatives []Alternative
+
+	// Get rule alternative list
+	ruleAltListCtx := ruleBlockCtx.RuleAltList()
+	if ruleAltListCtx == nil {
+		return alternatives
+	}
+
+	// Process each labeled alternative
+	for _, labeledAltCtx := range ruleAltListCtx.AllLabeledAlt() {
+		alternative := v.extractAlternative(labeledAltCtx)
+		alternatives = append(alternatives, alternative)
+	}
+
+	return alternatives
+}
+
+// extractAlternative extracts a single alternative
+func (v *GrammarExtractorVisitor) extractAlternative(labeledAltCtx grammar.ILabeledAltContext) Alternative {
+	var elements []Element
+
+	// Get alternative context
+	altCtx := labeledAltCtx.Alternative()
+	if altCtx != nil {
+		// Process each element in the alternative
+		for _, elementCtx := range altCtx.AllElement() {
+			element := v.extractElement(elementCtx)
+			if element != nil {
+				elements = append(elements, *element)
+			}
+		}
+	}
+
+	return Alternative{
+		Elements: elements,
+	}
+}
+
+// extractElement extracts an element from an element context
+func (v *GrammarExtractorVisitor) extractElement(elementCtx grammar.IElementContext) *Element {
+	// Handle labeled elements
+	if labeledElementCtx := elementCtx.LabeledElement(); labeledElementCtx != nil {
+		return v.extractLabeledElement(labeledElementCtx)
+	}
+
+	// Handle atoms (terminals/non-terminals)
+	if atomCtx := elementCtx.Atom(); atomCtx != nil {
+		element := v.extractAtom(atomCtx)
+		// Check for quantifiers
+		if element != nil {
+			element.Quantifier = v.extractQuantifier(elementCtx.EbnfSuffix())
+		}
+		return element
+	}
+
+	// Handle EBNF constructs (blocks with quantifiers)
+	if ebnfCtx := elementCtx.Ebnf(); ebnfCtx != nil {
+		return v.extractEbnf(ebnfCtx)
+	}
+
 	return nil
 }
 
-// extractPlaceholderRules creates placeholder rules for testing
-func (v *GrammarExtractorVisitor) extractPlaceholderRules() {
-	// Add some basic rules for testing
-	v.parserRules["selectStmt"] = &Rule{
-		Name:    "selectStmt",
-		IsLexer: false,
-		Alternatives: []Alternative{
-			{
-				Elements: []Element{
-					{Type: LITERAL, Value: "SELECT"},
-					{Type: RULE_REF, Value: "columnList"},
-					{Type: LITERAL, Value: "FROM"},
-					{Type: RULE_REF, Value: "tableRef"},
-					{Type: RULE_REF, Value: "whereClause", Quantifier: OPTIONAL_Q},
-				},
-			},
-		},
+// extractLabeledElement extracts a labeled element (e.g., label=atom)
+func (v *GrammarExtractorVisitor) extractLabeledElement(labeledElementCtx grammar.ILabeledElementContext) *Element {
+	// For now, just extract the atom part and ignore the label
+	if atomCtx := labeledElementCtx.Atom(); atomCtx != nil {
+		return v.extractAtom(atomCtx)
 	}
+	if blockCtx := labeledElementCtx.Block(); blockCtx != nil {
+		return v.extractBlock(blockCtx)
+	}
+	return nil
+}
+
+// extractAtom extracts an atom (terminal or non-terminal)
+func (v *GrammarExtractorVisitor) extractAtom(atomCtx grammar.IAtomContext) *Element {
+	// Handle terminal definition (string literal or token reference)
+	if terminalDefCtx := atomCtx.TerminalDef(); terminalDefCtx != nil {
+		return v.extractTerminalDef(terminalDefCtx)
+	}
+
+	// Handle rule reference
+	if rulerefCtx := atomCtx.Ruleref(); rulerefCtx != nil {
+		return v.extractRuleRef(rulerefCtx)
+	}
+
+	// Handle wildcard (.)
+	if wildcardCtx := atomCtx.Wildcard(); wildcardCtx != nil {
+		return &Element{
+			Value: WildcardValue{},
+		}
+	}
+
+	// Handle not sets, ranges, etc. - for now just return nil
+	return nil
+}
+
+// extractTerminalDef extracts a terminal definition (literal string or token reference)
+func (v *GrammarExtractorVisitor) extractTerminalDef(terminalDefCtx grammar.ITerminalDefContext) *Element {
+	if stringLiteralToken := terminalDefCtx.STRING_LITERAL(); stringLiteralToken != nil {
+		return &Element{
+			Value: LiteralValue{Text: stringLiteralToken.GetText()},
+		}
+	}
+	if tokenRefToken := terminalDefCtx.TOKEN_REF(); tokenRefToken != nil {
+		return &Element{
+			Value: ReferenceValue{Name: tokenRefToken.GetText()},
+		}
+	}
+	return nil
+}
+
+
+// extractRuleRef extracts a rule reference
+func (v *GrammarExtractorVisitor) extractRuleRef(rulerefCtx grammar.IRulerefContext) *Element {
+	if ruleRefToken := rulerefCtx.RULE_REF(); ruleRefToken != nil {
+		return &Element{
+			Value: ReferenceValue{Name: ruleRefToken.GetText()},
+		}
+	}
+	return nil
+}
+
+// extractBlock extracts a block (grouped alternatives)
+func (v *GrammarExtractorVisitor) extractBlock(blockCtx grammar.IBlockContext) *Element {
+	// Get the alternative list from the block
+	altListCtx := blockCtx.AltList()
+	if altListCtx == nil {
+		globalBlockID++
+		blockID := fmt.Sprintf("block_%d_alts", globalBlockID)
+		emptyAlts := []Alternative{}
+		v.blockAltMap[blockID] = emptyAlts
+		
+		return &Element{
+			Value: BlockValue{ID: blockID, Alternatives: emptyAlts},
+		}
+	}
+
+	// Extract all alternatives from the block
+	alts := altListCtx.AllAlternative()
+	if len(alts) == 0 {
+		globalBlockID++
+		blockID := fmt.Sprintf("block_%d_alts", globalBlockID)
+		emptyAlts := []Alternative{}
+		v.blockAltMap[blockID] = emptyAlts
+		
+		return &Element{
+			Value: BlockValue{ID: blockID, Alternatives: emptyAlts},
+		}
+	}
+
+	// Extract all alternatives
+	blockAlternatives := []Alternative{}
+	for _, altCtx := range alts {
+		elements := []Element{}
+		for _, elementCtx := range altCtx.AllElement() {
+			element := v.extractElement(elementCtx)
+			if element != nil {
+				elements = append(elements, *element)
+			}
+		}
+		blockAlternatives = append(blockAlternatives, Alternative{Elements: elements})
+	}
+
+	// If it's a single element in a single alternative, we can simplify
+	if len(blockAlternatives) == 1 && len(blockAlternatives[0].Elements) == 1 {
+		return &blockAlternatives[0].Elements[0]
+	}
+	
+	// Generate global unique block ID and store mapping
+	globalBlockID++
+	blockID := fmt.Sprintf("block_%d_alts", globalBlockID)
+	v.blockAltMap[blockID] = blockAlternatives
 	
-	v.parserRules["columnList"] = &Rule{
-		Name:    "columnList",
-		IsLexer: false,
-		Alternatives: []Alternative{
-			{
-				Elements: []Element{
-					{Type: RULE_REF, Value: "column"},
-					{
-						Type:       RULE_REF,
-						Value:      "column",
-						Quantifier: ZERO_MORE,
-					},
-				},
-			},
-		},
-	}
-
-	v.lexerRules["SELECT"] = &Rule{
-		Name:    "SELECT",
-		IsLexer: true,
-		Alternatives: []Alternative{
-			{
-				Elements: []Element{
-					{Type: LITERAL, Value: "'SELECT'"},
-				},
-			},
-		},
+	return &Element{
+		Value: BlockValue{ID: blockID, Alternatives: blockAlternatives},
+	}
+}
+
+// extractEbnf extracts EBNF constructs (blocks with suffixes)
+func (v *GrammarExtractorVisitor) extractEbnf(ebnfCtx grammar.IEbnfContext) *Element {
+	// Get the block
+	blockCtx := ebnfCtx.Block()
+	if blockCtx == nil {
+		return nil
+	}
+
+	element := v.extractBlock(blockCtx)
+	if element != nil {
+		// Apply quantifier from block suffix
+		if blockSuffixCtx := ebnfCtx.BlockSuffix(); blockSuffixCtx != nil {
+			if ebnfSuffixCtx := blockSuffixCtx.EbnfSuffix(); ebnfSuffixCtx != nil {
+				element.Quantifier = v.extractQuantifier(ebnfSuffixCtx)
+			}
+		}
+	}
+
+	return element
+}
+
+// extractQuantifier extracts quantifier from EBNF suffix
+func (v *GrammarExtractorVisitor) extractQuantifier(ebnfSuffixCtx grammar.IEbnfSuffixContext) Quantifier {
+	if ebnfSuffixCtx == nil {
+		return NONE
 	}
+
+	// Check for question mark (optional)
+	if ebnfSuffixCtx.QUESTION(0) != nil {
+		return OPTIONAL_Q
+	}
+
+	// Check for star (zero or more)
+	if ebnfSuffixCtx.STAR() != nil {
+		return ZERO_MORE
+	}
+
+	// Check for plus (one or more)
+	if ebnfSuffixCtx.PLUS() != nil {
+		return ONE_MORE
+	}
+
+	return NONE
 }
\ No newline at end of file
diff --git a/tools/fuzzing/internal/grammar/parser_test.go b/tools/fuzzing/internal/grammar/parser_test.go
new file mode 100644
index 0000000..12ffa54
--- /dev/null
+++ b/tools/fuzzing/internal/grammar/parser_test.go
@@ -0,0 +1,219 @@
+package grammar
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// TestCompleteGrammarIR tests the complete intermediate representation of parsed grammar
+func TestCompleteGrammarIR(t *testing.T) {
+	grammarContent := `
+parser grammar CompleteIRTest;
+
+// Simple rule with literals
+greeting: 'Hello' 'World';
+
+// Rule with alternatives  
+statement: selectStmt | insertStmt | 'DELETE';
+
+// Rule with quantifiers and mixed elements
+selectStmt: 'SELECT' columnList 'FROM' IDENTIFIER whereClause?;
+
+// Rule with quantified elements
+columnList: column (',' column)*;
+
+// Rule with token reference
+column: IDENTIFIER ('AS' IDENTIFIER)?;
+
+// Rule with optional and alternatives
+whereClause: 'WHERE' expr;
+
+// Complex rule with multiple alternatives and quantifiers
+expr: expr '+' expr
+    | expr '*' expr  
+    | '(' expr ')'
+    | IDENTIFIER
+    | NUMBER;
+`
+
+	tmpFile := createTempGrammarFile(t, grammarContent)
+	defer os.Remove(tmpFile)
+
+	grammar, err := ParseGrammarFile(tmpFile)
+	if err != nil {
+		t.Fatalf("Failed to parse grammar: %v", err)
+	}
+
+	// Basic grammar properties
+	if grammar == nil {
+		t.Fatal("Grammar is nil")
+	}
+	if grammar.FilePath != tmpFile {
+		t.Errorf("Expected file path %s, got %s", tmpFile, grammar.FilePath)
+	}
+	if len(grammar.LexerRules) != 0 {
+		t.Errorf("Expected 0 lexer rules, got %d", len(grammar.LexerRules))
+	}
+	if len(grammar.ParserRules) != 7 {
+		t.Errorf("Expected 7 parser rules, got %d", len(grammar.ParserRules))
+	}
+
+	// Test cases for rule validation
+	tests := []struct {
+		ruleName     string
+		alternatives int
+		elements     []elementTest
+	}{
+		{
+			ruleName:     "greeting",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "'Hello'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 0, elementIndex: 1, value: "'World'", quantifier: NONE, elementType: "literal"},
+			},
+		},
+		{
+			ruleName:     "statement",
+			alternatives: 3,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "selectStmt", quantifier: NONE, elementType: "reference"},
+				{altIndex: 1, elementIndex: 0, value: "insertStmt", quantifier: NONE, elementType: "reference"},
+				{altIndex: 2, elementIndex: 0, value: "'DELETE'", quantifier: NONE, elementType: "literal"},
+			},
+		},
+		{
+			ruleName:     "selectStmt",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "'SELECT'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 0, elementIndex: 1, value: "columnList", quantifier: NONE, elementType: "reference"},
+				{altIndex: 0, elementIndex: 2, value: "'FROM'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 0, elementIndex: 3, value: "IDENTIFIER", quantifier: NONE, elementType: "reference"},
+				{altIndex: 0, elementIndex: 4, value: "whereClause", quantifier: OPTIONAL_Q, elementType: "reference"},
+			},
+		},
+		{
+			ruleName:     "columnList",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "column", quantifier: NONE, elementType: "reference"},
+				{altIndex: 0, elementIndex: 1, value: "(',' column)", quantifier: ZERO_MORE, elementType: "block"},
+			},
+		},
+		{
+			ruleName:     "column",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "IDENTIFIER", quantifier: NONE, elementType: "reference"},
+				{altIndex: 0, elementIndex: 1, value: "('AS' IDENTIFIER)", quantifier: OPTIONAL_Q, elementType: "block"},
+			},
+		},
+		{
+			ruleName:     "whereClause",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "'WHERE'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 0, elementIndex: 1, value: "expr", quantifier: NONE, elementType: "reference"},
+			},
+		},
+		{
+			ruleName:     "expr",
+			alternatives: 5,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "expr", quantifier: NONE, elementType: "reference"},
+				{altIndex: 0, elementIndex: 1, value: "'+'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 0, elementIndex: 2, value: "expr", quantifier: NONE, elementType: "reference"},
+				{altIndex: 1, elementIndex: 1, value: "'*'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 2, elementIndex: 0, value: "'('", quantifier: NONE, elementType: "literal"},
+				{altIndex: 2, elementIndex: 1, value: "expr", quantifier: NONE, elementType: "reference"},
+				{altIndex: 2, elementIndex: 2, value: "')'", quantifier: NONE, elementType: "literal"},
+				{altIndex: 3, elementIndex: 0, value: "IDENTIFIER", quantifier: NONE, elementType: "reference"},
+				{altIndex: 4, elementIndex: 0, value: "NUMBER", quantifier: NONE, elementType: "reference"},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.ruleName, func(t *testing.T) {
+			rule := grammar.GetRule(tc.ruleName)
+			if rule == nil {
+				t.Fatalf("rule %s not found", tc.ruleName)
+			}
+			if rule.Name != tc.ruleName || rule.IsLexer {
+				t.Errorf("rule %s has incorrect metadata", tc.ruleName)
+			}
+			if len(rule.Alternatives) != tc.alternatives {
+				t.Errorf("%s: expected %d alternatives, got %d", tc.ruleName, tc.alternatives, len(rule.Alternatives))
+			}
+
+			for _, elem := range tc.elements {
+				altIndex := elem.altIndex
+				elementIndex := elem.elementIndex
+
+				if altIndex >= len(rule.Alternatives) {
+					t.Errorf("%s: alternative %d out of range", tc.ruleName, altIndex)
+					continue
+				}
+
+				elements := rule.Alternatives[altIndex].Elements
+				if elementIndex >= len(elements) {
+					t.Errorf("%s alt %d: element %d out of range", tc.ruleName, altIndex, elementIndex)
+					continue
+				}
+
+				element := elements[elementIndex]
+				if elem.value != "" && element.Value.String() != elem.value {
+					t.Errorf("%s alt %d elem %d: expected value %s, got %s", tc.ruleName, altIndex, elementIndex, elem.value, element.Value.String())
+				}
+				if element.Quantifier != elem.quantifier {
+					t.Errorf("%s alt %d elem %d: expected quantifier %v, got %v", tc.ruleName, altIndex, elementIndex, elem.quantifier, element.Quantifier)
+				}
+				
+				// Validate element type using type assertions
+				switch elem.elementType {
+				case "literal":
+					if _, ok := element.Value.(LiteralValue); !ok {
+						t.Errorf("%s alt %d elem %d: expected LiteralValue, got %T", tc.ruleName, altIndex, elementIndex, element.Value)
+					}
+				case "reference":
+					if _, ok := element.Value.(ReferenceValue); !ok {
+						t.Errorf("%s alt %d elem %d: expected ReferenceValue, got %T", tc.ruleName, altIndex, elementIndex, element.Value)
+					}
+				case "block":
+					if _, ok := element.Value.(BlockValue); !ok {
+						t.Errorf("%s alt %d elem %d: expected BlockValue, got %T", tc.ruleName, altIndex, elementIndex, element.Value)
+					}
+				}
+			}
+		})
+	}
+
+	// Test GetAllRules method
+	allRules := grammar.GetAllRules()
+	if len(allRules) != 7 {
+		t.Errorf("GetAllRules: expected 7 rules, got %d", len(allRules))
+	}
+}
+
+type elementTest struct {
+	altIndex     int
+	elementIndex int
+	value        string
+	quantifier   Quantifier
+	elementType  string // "literal", "reference", or "block"
+}
+
+// Helper functions
+
+func createTempGrammarFile(t *testing.T, content string) string {
+	tmpDir := os.TempDir()
+	tmpFile := filepath.Join(tmpDir, "test_grammar.g4")
+
+	err := os.WriteFile(tmpFile, []byte(content), 0644)
+	if err != nil {
+		t.Fatalf("Failed to create temp grammar file: %v", err)
+	}
+
+	return tmpFile
+}
\ No newline at end of file

From 56efd1424bf7f578e5ffffb0579be657c427da22 Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Thu, 28 Aug 2025 10:51:51 +0800
Subject: [PATCH 4/9] feat: lexer parser v1

---
 tools/fuzzing/internal/grammar/discovery.go   | 187 ---------------
 tools/fuzzing/internal/grammar/parser.go      | 220 +++++++++++++++++-
 tools/fuzzing/internal/grammar/parser_test.go | 191 +++++++++++++++
 3 files changed, 409 insertions(+), 189 deletions(-)
 delete mode 100644 tools/fuzzing/internal/grammar/discovery.go

diff --git a/tools/fuzzing/internal/grammar/discovery.go b/tools/fuzzing/internal/grammar/discovery.go
deleted file mode 100644
index 9aa2f51..0000000
--- a/tools/fuzzing/internal/grammar/discovery.go
+++ /dev/null
@@ -1,187 +0,0 @@
-package grammar
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-
-	"github.com/pkg/errors"
-)
-
-// GrammarFiles represents a pair of lexer and parser grammar files
-type GrammarFiles struct {
-	LexerFile  string
-	ParserFile string
-	Directory  string
-}
-
-// DiscoverGrammarFiles finds lexer and parser files for a given grammar name
-func DiscoverGrammarFiles(grammarName string) (*GrammarFiles, error) {
-	// Start from fuzzing directory, go up to parser root
-	currentDir, err := os.Getwd()
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to get current directory")
-	}
-	
-	// Navigate to parser root (assuming we're in tools/fuzzing)
-	repoRoot := filepath.Join(currentDir, "..", "..")
-	
-	// Try different grammar directory patterns
-	grammarDirs := []string{
-		filepath.Join(repoRoot, grammarName),                    // Direct: postgresql/, cql/
-		filepath.Join(repoRoot, "tools", "grammar"),             // ANTLR v4 self-grammar
-		filepath.Join(repoRoot, "grammars", grammarName),        // Alternative structure
-	}
-	
-	for _, dir := range grammarDirs {
-		if files, err := findGrammarFilesInDir(dir, grammarName); err == nil {
-			return files, nil
-		}
-	}
-	
-	return nil, errors.Errorf("grammar '%s' not found in any of the expected locations", grammarName)
-}
-
-// findGrammarFilesInDir searches for grammar files in a specific directory
-func findGrammarFilesInDir(dir, grammarName string) (*GrammarFiles, error) {
-	if _, err := os.Stat(dir); os.IsNotExist(err) {
-		return nil, errors.Errorf("directory does not exist: %s", dir)
-	}
-	
-	entries, err := os.ReadDir(dir)
-	if err != nil {
-		return nil, errors.Wrapf(err, "failed to read directory %s", dir)
-	}
-	
-	var lexerFile, parserFile string
-	
-	// Look for grammar files using different naming patterns
-	patterns := []struct {
-		lexerPattern  string
-		parserPattern string
-	}{
-		// Standard patterns: PostgreSQLLexer.g4, PostgreSQLParser.g4  
-		{fmt.Sprintf("%sLexer.g4", capitalize(grammarName)), fmt.Sprintf("%sParser.g4", capitalize(grammarName))},
-		// Special case for PostgreSQL: postgresql -> PostgreSQL
-		{fmt.Sprintf("%sLexer.g4", strings.ToUpper(grammarName)), fmt.Sprintf("%sParser.g4", strings.ToUpper(grammarName))},
-		// Alternate patterns: CqlLexer.g4, CqlParser.g4  
-		{fmt.Sprintf("%sLexer.g4", strings.Title(grammarName)), fmt.Sprintf("%sParser.g4", strings.Title(grammarName))},
-		// Lowercase patterns: postgresql_lexer.g4, postgresql_parser.g4
-		{fmt.Sprintf("%s_lexer.g4", strings.ToLower(grammarName)), fmt.Sprintf("%s_parser.g4", strings.ToLower(grammarName))},
-	}
-	
-	// Special cases for known grammar naming conventions
-	switch strings.ToLower(grammarName) {
-	case "postgresql":
-		patterns = append(patterns, struct {
-			lexerPattern  string
-			parserPattern string
-		}{"PostgreSQLLexer.g4", "PostgreSQLParser.g4"})
-	case "antlrv4":
-		patterns = append(patterns, struct {
-			lexerPattern  string
-			parserPattern string
-		}{"ANTLRv4Lexer.g4", "ANTLRv4Parser.g4"})
-	}
-	
-	// Special case for ANTLR v4 self-grammar directory  
-	if strings.Contains(dir, "tools/grammar") {
-		patterns = append(patterns, struct {
-			lexerPattern  string
-			parserPattern string
-		}{"ANTLRv4Lexer.g4", "ANTLRv4Parser.g4"})
-	}
-	
-	for _, entry := range entries {
-		if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".g4") {
-			for _, pattern := range patterns {
-				if entry.Name() == pattern.lexerPattern {
-					lexerFile = filepath.Join(dir, entry.Name())
-				}
-				if entry.Name() == pattern.parserPattern {
-					parserFile = filepath.Join(dir, entry.Name())
-				}
-			}
-		}
-	}
-	
-	// Check if we found both files
-	if lexerFile == "" {
-		return nil, errors.Errorf("lexer file not found in %s", dir)
-	}
-	if parserFile == "" {
-		return nil, errors.Errorf("parser file not found in %s", dir)
-	}
-	
-	return &GrammarFiles{
-		LexerFile:  lexerFile,
-		ParserFile: parserFile,
-		Directory:  dir,
-	}, nil
-}
-
-// ListAvailableGrammars scans for all available grammar directories
-func ListAvailableGrammars() ([]string, error) {
-	currentDir, err := os.Getwd()
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to get current directory")
-	}
-	
-	repoRoot := filepath.Join(currentDir, "..", "..")
-	
-	var grammars []string
-	
-	// Scan for grammar directories
-	entries, err := os.ReadDir(repoRoot)
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to read repository root")
-	}
-	
-	for _, entry := range entries {
-		if entry.IsDir() {
-			dirPath := filepath.Join(repoRoot, entry.Name())
-			if hasGrammarFiles(dirPath) {
-				grammars = append(grammars, entry.Name())
-			}
-		}
-	}
-	
-	// Add special case for ANTLR v4 self-grammar
-	if hasGrammarFiles(filepath.Join(repoRoot, "tools", "grammar")) {
-		grammars = append(grammars, "antlrv4")
-	}
-	
-	return grammars, nil
-}
-
-// hasGrammarFiles checks if a directory contains .g4 files
-func hasGrammarFiles(dir string) bool {
-	entries, err := os.ReadDir(dir)
-	if err != nil {
-		return false
-	}
-	
-	var hasLexer, hasParser bool
-	for _, entry := range entries {
-		if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".g4") {
-			name := strings.ToLower(entry.Name())
-			if strings.Contains(name, "lexer") {
-				hasLexer = true
-			}
-			if strings.Contains(name, "parser") {
-				hasParser = true
-			}
-		}
-	}
-	
-	return hasLexer && hasParser
-}
-
-// capitalize capitalizes the first letter of a string, preserving the rest
-func capitalize(s string) string {
-	if len(s) == 0 {
-		return s
-	}
-	return strings.ToUpper(s[:1]) + s[1:]
-}
\ No newline at end of file
diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go
index 0e24435..70ceec2 100644
--- a/tools/fuzzing/internal/grammar/parser.go
+++ b/tools/fuzzing/internal/grammar/parser.go
@@ -283,11 +283,14 @@ func (v *GrammarExtractorVisitor) VisitRules(ctx grammar.IRulesContext) interfac
 
 // VisitRuleSpec visits a rule specification (could be parser or lexer rule)
 func (v *GrammarExtractorVisitor) VisitRuleSpec(ctx grammar.IRuleSpecContext) interface{} {
-	// Focus only on parser rules for now
+	// Handle parser rules
 	if parserRuleCtx := ctx.ParserRuleSpec(); parserRuleCtx != nil {
 		v.VisitParserRuleSpec(parserRuleCtx)
 	}
-	// Skip lexer rules for now
+	// Handle lexer rules
+	if lexerRuleCtx := ctx.LexerRuleSpec(); lexerRuleCtx != nil {
+		v.VisitLexerRuleSpec(lexerRuleCtx)
+	}
 	return nil
 }
 
@@ -322,6 +325,37 @@ func (v *GrammarExtractorVisitor) VisitParserRuleSpec(ctx grammar.IParserRuleSpe
 	return nil
 }
 
+// VisitLexerRuleSpec visits a lexer rule specification
+func (v *GrammarExtractorVisitor) VisitLexerRuleSpec(ctx grammar.ILexerRuleSpecContext) interface{} {
+	// Get rule name
+	ruleNameToken := ctx.TOKEN_REF()
+	if ruleNameToken == nil {
+		return nil
+	}
+	ruleName := ruleNameToken.GetText()
+
+	// Get lexer rule block (alternatives)
+	lexerRuleBlockCtx := ctx.LexerRuleBlock()
+	if lexerRuleBlockCtx == nil {
+		return nil
+	}
+
+	// Extract alternatives from lexer rule block
+	alternatives := v.extractLexerAlternatives(lexerRuleBlockCtx)
+
+	// Create rule
+	rule := &Rule{
+		Name:         ruleName,
+		IsLexer:      true,
+		Alternatives: alternatives,
+	}
+
+	// Store rule
+	v.lexerRules[ruleName] = rule
+
+	return nil
+}
+
 // extractAlternatives extracts alternatives from a rule block
 func (v *GrammarExtractorVisitor) extractAlternatives(ruleBlockCtx grammar.IRuleBlockContext) []Alternative {
 	var alternatives []Alternative
@@ -341,6 +375,46 @@ func (v *GrammarExtractorVisitor) extractAlternatives(ruleBlockCtx grammar.IRule
 	return alternatives
 }
 
+// extractLexerAlternatives extracts alternatives from a lexer rule block
+func (v *GrammarExtractorVisitor) extractLexerAlternatives(lexerRuleBlockCtx grammar.ILexerRuleBlockContext) []Alternative {
+	var alternatives []Alternative
+
+	// Get lexer alternative list
+	lexerAltListCtx := lexerRuleBlockCtx.LexerAltList()
+	if lexerAltListCtx == nil {
+		return alternatives
+	}
+
+	// Process each lexer alternative
+	for _, lexerAltCtx := range lexerAltListCtx.AllLexerAlt() {
+		alternative := v.extractLexerAlternative(lexerAltCtx)
+		alternatives = append(alternatives, alternative)
+	}
+
+	return alternatives
+}
+
+// extractLexerAlternative extracts a single lexer alternative
+func (v *GrammarExtractorVisitor) extractLexerAlternative(lexerAltCtx grammar.ILexerAltContext) Alternative {
+	var elements []Element
+
+	// Get lexer elements context
+	lexerElementsCtx := lexerAltCtx.LexerElements()
+	if lexerElementsCtx != nil {
+		// Process each lexer element
+		for _, lexerElementCtx := range lexerElementsCtx.AllLexerElement() {
+			element := v.extractLexerElement(lexerElementCtx)
+			if element != nil {
+				elements = append(elements, *element)
+			}
+		}
+	}
+
+	return Alternative{
+		Elements: elements,
+	}
+}
+
 // extractAlternative extracts a single alternative
 func (v *GrammarExtractorVisitor) extractAlternative(labeledAltCtx grammar.ILabeledAltContext) Alternative {
 	var elements []Element
@@ -387,6 +461,148 @@ func (v *GrammarExtractorVisitor) extractElement(elementCtx grammar.IElementCont
 	return nil
 }
 
+// extractLexerElement extracts a lexer element from a lexer element context
+func (v *GrammarExtractorVisitor) extractLexerElement(lexerElementCtx grammar.ILexerElementContext) *Element {
+	// Handle lexer atoms (character ranges, terminals, etc.)
+	if lexerAtomCtx := lexerElementCtx.LexerAtom(); lexerAtomCtx != nil {
+		element := v.extractLexerAtom(lexerAtomCtx)
+		// Check for quantifiers
+		if element != nil {
+			element.Quantifier = v.extractQuantifier(lexerElementCtx.EbnfSuffix())
+		}
+		return element
+	}
+
+	// Handle lexer blocks (grouped alternatives)
+	if lexerBlockCtx := lexerElementCtx.LexerBlock(); lexerBlockCtx != nil {
+		element := v.extractLexerBlock(lexerBlockCtx)
+		// Check for quantifiers
+		if element != nil {
+			element.Quantifier = v.extractQuantifier(lexerElementCtx.EbnfSuffix())
+		}
+		return element
+	}
+
+	// Handle action blocks (for now, just return nil as they don't generate text)
+	if lexerElementCtx.ActionBlock() != nil {
+		// Action blocks don't contribute to generated text, so we skip them
+		return nil
+	}
+
+	return nil
+}
+
+// extractLexerAtom extracts a lexer atom (character ranges, terminals, etc.)
+func (v *GrammarExtractorVisitor) extractLexerAtom(lexerAtomCtx grammar.ILexerAtomContext) *Element {
+	// Handle terminal definition (string literal or token reference)
+	if terminalDefCtx := lexerAtomCtx.TerminalDef(); terminalDefCtx != nil {
+		return v.extractTerminalDef(terminalDefCtx)
+	}
+
+	// Handle character range (e.g., [a-z])
+	if characterRangeCtx := lexerAtomCtx.CharacterRange(); characterRangeCtx != nil {
+		return v.extractCharacterRange(characterRangeCtx)
+	}
+
+	// Handle not set (e.g., ~[abc])
+	if notSetCtx := lexerAtomCtx.NotSet(); notSetCtx != nil {
+		return v.extractNotSet(notSetCtx)
+	}
+
+	// Handle lexer character set (e.g., [abc])
+	if lexerCharSetToken := lexerAtomCtx.LEXER_CHAR_SET(); lexerCharSetToken != nil {
+		return &Element{
+			Value: LiteralValue{Text: lexerCharSetToken.GetText()},
+		}
+	}
+
+	// Handle wildcard (.)
+	if wildcardCtx := lexerAtomCtx.Wildcard(); wildcardCtx != nil {
+		return &Element{
+			Value: WildcardValue{},
+		}
+	}
+
+	return nil
+}
+
+// extractLexerBlock extracts a lexer block (grouped alternatives)
+func (v *GrammarExtractorVisitor) extractLexerBlock(lexerBlockCtx grammar.ILexerBlockContext) *Element {
+	// Get the lexer alternative list from the block
+	lexerAltListCtx := lexerBlockCtx.LexerAltList()
+	if lexerAltListCtx == nil {
+		globalBlockID++
+		blockID := fmt.Sprintf("lexer_block_%d_alts", globalBlockID)
+		emptyAlts := []Alternative{}
+		v.blockAltMap[blockID] = emptyAlts
+		
+		return &Element{
+			Value: BlockValue{ID: blockID, Alternatives: emptyAlts},
+		}
+	}
+
+	// Extract all lexer alternatives from the block
+	lexerAlts := lexerAltListCtx.AllLexerAlt()
+	if len(lexerAlts) == 0 {
+		globalBlockID++
+		blockID := fmt.Sprintf("lexer_block_%d_alts", globalBlockID)
+		emptyAlts := []Alternative{}
+		v.blockAltMap[blockID] = emptyAlts
+		
+		return &Element{
+			Value: BlockValue{ID: blockID, Alternatives: emptyAlts},
+		}
+	}
+
+	// Extract all alternatives
+	blockAlternatives := []Alternative{}
+	for _, lexerAltCtx := range lexerAlts {
+		elements := []Element{}
+		if lexerElementsCtx := lexerAltCtx.LexerElements(); lexerElementsCtx != nil {
+			for _, lexerElementCtx := range lexerElementsCtx.AllLexerElement() {
+				element := v.extractLexerElement(lexerElementCtx)
+				if element != nil {
+					elements = append(elements, *element)
+				}
+			}
+		}
+		blockAlternatives = append(blockAlternatives, Alternative{Elements: elements})
+	}
+	
+	// Generate global unique block ID and store mapping
+	globalBlockID++
+	blockID := fmt.Sprintf("lexer_block_%d_alts", globalBlockID)
+	v.blockAltMap[blockID] = blockAlternatives
+	
+	return &Element{
+		Value: BlockValue{ID: blockID, Alternatives: blockAlternatives},
+	}
+}
+
+// extractCharacterRange extracts a character range (e.g., 'a'..'z')
+func (v *GrammarExtractorVisitor) extractCharacterRange(characterRangeCtx grammar.ICharacterRangeContext) *Element {
+	// Get the start and end of the range
+	stringLiterals := characterRangeCtx.AllSTRING_LITERAL()
+	if len(stringLiterals) == 2 {
+		startChar := stringLiterals[0].GetText()
+		endChar := stringLiterals[1].GetText()
+		rangeText := fmt.Sprintf("%s..%s", startChar, endChar)
+		return &Element{
+			Value: LiteralValue{Text: rangeText},
+		}
+	}
+	return nil
+}
+
+// extractNotSet extracts a not set (e.g., ~[abc])
+func (v *GrammarExtractorVisitor) extractNotSet(notSetCtx grammar.INotSetContext) *Element {
+	// For now, represent as a literal text
+	// In a real implementation, this would need more sophisticated handling
+	return &Element{
+		Value: LiteralValue{Text: "~[...]"},
+	}
+}
+
 // extractLabeledElement extracts a labeled element (e.g., label=atom)
 func (v *GrammarExtractorVisitor) extractLabeledElement(labeledElementCtx grammar.ILabeledElementContext) *Element {
 	// For now, just extract the atom part and ignore the label
diff --git a/tools/fuzzing/internal/grammar/parser_test.go b/tools/fuzzing/internal/grammar/parser_test.go
index 12ffa54..e3ee4cb 100644
--- a/tools/fuzzing/internal/grammar/parser_test.go
+++ b/tools/fuzzing/internal/grammar/parser_test.go
@@ -216,4 +216,195 @@ func createTempGrammarFile(t *testing.T, content string) string {
 	}
 
 	return tmpFile
+}
+
+// TestLexerRuleParsing tests the parsing of lexer rules
+func TestLexerRuleParsing(t *testing.T) {
+	grammarContent := `
+lexer grammar TestLexer;
+
+// Simple string literal
+SELECT: 'SELECT';
+
+// Character range
+LETTER: [a-zA-Z];
+
+// Complex rule with alternatives and quantifiers
+IDENTIFIER: [a-zA-Z_] [a-zA-Z0-9_]*;
+
+// Rule with character set
+DIGIT: [0-9];
+
+// Rule with wildcard and quantifier
+COMMENT: '//' .*? '\n';
+`
+
+	tmpFile := createTempGrammarFile(t, grammarContent)
+	defer os.Remove(tmpFile)
+
+	grammar, err := ParseGrammarFile(tmpFile)
+	if err != nil {
+		t.Fatalf("Failed to parse lexer grammar: %v", err)
+	}
+
+	// Basic grammar properties
+	if grammar == nil {
+		t.Fatal("Grammar is nil")
+	}
+	if len(grammar.ParserRules) != 0 {
+		t.Errorf("Expected 0 parser rules, got %d", len(grammar.ParserRules))
+	}
+	if len(grammar.LexerRules) != 5 {
+		t.Errorf("Expected 5 lexer rules, got %d", len(grammar.LexerRules))
+	}
+
+	// Test cases for lexer rule validation
+	tests := []struct {
+		ruleName     string
+		alternatives int
+		elements     []elementTest
+	}{
+		{
+			ruleName:     "SELECT",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "'SELECT'", quantifier: NONE, elementType: "literal"},
+			},
+		},
+		{
+			ruleName:     "LETTER",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "[a-zA-Z]", quantifier: NONE, elementType: "literal"},
+			},
+		},
+		{
+			ruleName:     "IDENTIFIER",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "[a-zA-Z_]", quantifier: NONE, elementType: "literal"},
+				{altIndex: 0, elementIndex: 1, value: "[a-zA-Z0-9_]", quantifier: ZERO_MORE, elementType: "literal"},
+			},
+		},
+		{
+			ruleName:     "DIGIT",
+			alternatives: 1,
+			elements: []elementTest{
+				{altIndex: 0, elementIndex: 0, value: "[0-9]", quantifier: NONE, elementType: "literal"},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.ruleName, func(t *testing.T) {
+			rule := grammar.GetRule(tc.ruleName)
+			if rule == nil {
+				t.Fatalf("rule %s not found", tc.ruleName)
+			}
+			if rule.Name != tc.ruleName || !rule.IsLexer {
+				t.Errorf("rule %s has incorrect metadata: IsLexer=%v", tc.ruleName, rule.IsLexer)
+			}
+			if len(rule.Alternatives) != tc.alternatives {
+				t.Errorf("%s: expected %d alternatives, got %d", tc.ruleName, tc.alternatives, len(rule.Alternatives))
+			}
+
+			for _, elem := range tc.elements {
+				altIndex := elem.altIndex
+				elementIndex := elem.elementIndex
+
+				if altIndex >= len(rule.Alternatives) {
+					t.Errorf("%s: alternative %d out of range", tc.ruleName, altIndex)
+					continue
+				}
+
+				elements := rule.Alternatives[altIndex].Elements
+				if elementIndex >= len(elements) {
+					t.Errorf("%s alt %d: element %d out of range", tc.ruleName, altIndex, elementIndex)
+					continue
+				}
+
+				element := elements[elementIndex]
+				if elem.value != "" && element.Value.String() != elem.value {
+					t.Errorf("%s alt %d elem %d: expected value %s, got %s", tc.ruleName, altIndex, elementIndex, elem.value, element.Value.String())
+				}
+				if element.Quantifier != elem.quantifier {
+					t.Errorf("%s alt %d elem %d: expected quantifier %v, got %v", tc.ruleName, altIndex, elementIndex, elem.quantifier, element.Quantifier)
+				}
+				
+				// Validate element type using type assertions
+				switch elem.elementType {
+				case "literal":
+					if _, ok := element.Value.(LiteralValue); !ok {
+						t.Errorf("%s alt %d elem %d: expected LiteralValue, got %T", tc.ruleName, altIndex, elementIndex, element.Value)
+					}
+				case "reference":
+					if _, ok := element.Value.(ReferenceValue); !ok {
+						t.Errorf("%s alt %d elem %d: expected ReferenceValue, got %T", tc.ruleName, altIndex, elementIndex, element.Value)
+					}
+				case "block":
+					if _, ok := element.Value.(BlockValue); !ok {
+						t.Errorf("%s alt %d elem %d: expected BlockValue, got %T", tc.ruleName, altIndex, elementIndex, element.Value)
+					}
+				}
+			}
+		})
+	}
+}
+
+// TestCombinedGrammarParsing tests parsing of combined grammar with both parser and lexer rules
+func TestCombinedGrammarParsing(t *testing.T) {
+	grammarContent := `
+grammar CombinedTest;
+
+// Parser rules
+statement: selectStmt;
+selectStmt: 'SELECT' IDENTIFIER;
+
+// Lexer rules
+IDENTIFIER: [a-zA-Z_] [a-zA-Z0-9_]*;
+WS: [ \t\r\n]+ -> skip;
+`
+
+	tmpFile := createTempGrammarFile(t, grammarContent)
+	defer os.Remove(tmpFile)
+
+	grammar, err := ParseGrammarFile(tmpFile)
+	if err != nil {
+		t.Fatalf("Failed to parse combined grammar: %v", err)
+	}
+
+	// Basic grammar properties
+	if grammar == nil {
+		t.Fatal("Grammar is nil")
+	}
+	if len(grammar.ParserRules) != 2 {
+		t.Errorf("Expected 2 parser rules, got %d", len(grammar.ParserRules))
+	}
+	if len(grammar.LexerRules) != 2 {
+		t.Errorf("Expected 2 lexer rules, got %d", len(grammar.LexerRules))
+	}
+
+	// Test parser rule
+	statement := grammar.GetRule("statement")
+	if statement == nil {
+		t.Fatal("Parser rule 'statement' not found")
+	}
+	if statement.IsLexer {
+		t.Error("Parser rule incorrectly marked as lexer rule")
+	}
+
+	// Test lexer rule  
+	identifier := grammar.GetRule("IDENTIFIER")
+	if identifier == nil {
+		t.Fatal("Lexer rule 'IDENTIFIER' not found")
+	}
+	if !identifier.IsLexer {
+		t.Error("Lexer rule incorrectly marked as parser rule")
+	}
+
+	// Test that GetAllRules returns both types
+	allRules := grammar.GetAllRules()
+	if len(allRules) != 4 {
+		t.Errorf("Expected 4 total rules, got %d", len(allRules))
+	}
 }
\ No newline at end of file

From 192f56628f14535c334fff8b049480249bdcdf6b Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Thu, 28 Aug 2025 11:18:38 +0800
Subject: [PATCH 5/9] feat: generator for lexer rules

---
 .../fuzzing/internal/lexer/token_generator.go | 351 ++++++++++++++++++
 .../internal/lexer/token_generator_test.go    | 344 +++++++++++++++++
 2 files changed, 695 insertions(+)
 create mode 100644 tools/fuzzing/internal/lexer/token_generator.go
 create mode 100644 tools/fuzzing/internal/lexer/token_generator_test.go

diff --git a/tools/fuzzing/internal/lexer/token_generator.go b/tools/fuzzing/internal/lexer/token_generator.go
new file mode 100644
index 0000000..c201152
--- /dev/null
+++ b/tools/fuzzing/internal/lexer/token_generator.go
@@ -0,0 +1,351 @@
+package lexer
+
+import (
+	"fmt"
+	"math/rand"
+	"regexp"
+	"strings"
+
+	"github.com/bytebase/parser/tools/fuzzing/internal/grammar"
+)
+
+// TokenGenerator generates tokens from lexer rules
+type TokenGenerator struct {
+	random *rand.Rand
+	config *TokenGeneratorConfig
+}
+
+// TokenGeneratorConfig controls token generation behavior
+type TokenGeneratorConfig struct {
+	// MaxQuantifierCount limits how many times quantified elements repeat
+	MaxQuantifierCount int
+	// MinQuantifierCount sets minimum repetitions for + quantifiers
+	MinQuantifierCount int
+	// OptionalProbability controls likelihood of including optional elements (0.0-1.0)
+	OptionalProbability float64
+	// MaxDepth limits recursion depth to prevent infinite loops
+	MaxDepth int
+}
+
+// NewTokenGenerator creates a new token generator
+func NewTokenGenerator(seed int64, config *TokenGeneratorConfig) *TokenGenerator {
+	if config == nil {
+		config = &TokenGeneratorConfig{
+			MaxQuantifierCount:  5,
+			MinQuantifierCount:  1,
+			OptionalProbability: 0.7,
+			MaxDepth:           10,
+		}
+	}
+	return &TokenGenerator{
+		random: rand.New(rand.NewSource(seed)),
+		config: config,
+	}
+}
+
+// GenerateToken generates a token string from a lexer rule
+func (g *TokenGenerator) GenerateToken(rule *grammar.Rule) (string, error) {
+	if !rule.IsLexer {
+		return "", fmt.Errorf("rule %s is not a lexer rule", rule.Name)
+	}
+
+	if len(rule.Alternatives) == 0 {
+		return "", fmt.Errorf("rule %s has no alternatives", rule.Name)
+	}
+
+	// Select a random alternative
+	altIndex := g.random.Intn(len(rule.Alternatives))
+	alternative := rule.Alternatives[altIndex]
+
+	// Generate from the selected alternative
+	return g.generateFromAlternative(&alternative, 0)
+}
+
+// generateFromAlternative generates text from a lexer rule alternative
+func (g *TokenGenerator) generateFromAlternative(alt *grammar.Alternative, depth int) (string, error) {
+	if depth > g.config.MaxDepth {
+		return "", fmt.Errorf("maximum depth exceeded")
+	}
+
+	var result strings.Builder
+	for _, element := range alt.Elements {
+		text, err := g.generateFromElement(&element, depth+1)
+		if err != nil {
+			return "", err
+		}
+		result.WriteString(text)
+	}
+	return result.String(), nil
+}
+
+// generateFromElement generates text from a single lexer element
+func (g *TokenGenerator) generateFromElement(element *grammar.Element, depth int) (string, error) {
+	if depth > g.config.MaxDepth {
+		return "", fmt.Errorf("maximum depth exceeded")
+	}
+
+	// Handle quantifiers
+	switch element.Quantifier {
+	case grammar.OPTIONAL_Q: // ?
+		if g.random.Float64() > g.config.OptionalProbability {
+			return "", nil // Skip optional element
+		}
+		return g.generateElementContent(element, depth)
+
+	case grammar.ZERO_MORE: // *
+		count := g.random.Intn(g.config.MaxQuantifierCount + 1) // 0 to MaxQuantifierCount
+		return g.generateRepeated(element, count, depth)
+
+	case grammar.ONE_MORE: // +
+		count := g.config.MinQuantifierCount + g.random.Intn(g.config.MaxQuantifierCount)
+		return g.generateRepeated(element, count, depth)
+
+	default: // NONE
+		return g.generateElementContent(element, depth)
+	}
+}
+
+// generateRepeated generates repeated content for quantified elements
+func (g *TokenGenerator) generateRepeated(element *grammar.Element, count int, depth int) (string, error) {
+	var result strings.Builder
+	for i := 0; i < count; i++ {
+		text, err := g.generateElementContent(element, depth)
+		if err != nil {
+			return "", err
+		}
+		result.WriteString(text)
+	}
+	return result.String(), nil
+}
+
+// generateElementContent generates the actual content for an element
+func (g *TokenGenerator) generateElementContent(element *grammar.Element, depth int) (string, error) {
+	switch value := element.Value.(type) {
+	case grammar.LiteralValue:
+		return g.generateFromLiteral(value)
+	case grammar.BlockValue:
+		return g.generateFromBlock(value, depth)
+	case grammar.WildcardValue:
+		return g.generateFromWildcard()
+	case grammar.ReferenceValue:
+		// For lexer rules, this typically shouldn't happen unless it's a fragment reference
+		// For now, return the reference name as placeholder
+		return fmt.Sprintf("<%s>", value.Name), nil
+	default:
+		return "", fmt.Errorf("unsupported element value type: %T", value)
+	}
+}
+
+// generateFromLiteral generates text from a literal value
+func (g *TokenGenerator) generateFromLiteral(literal grammar.LiteralValue) (string, error) {
+	text := literal.Text
+
+	// Handle string literals - remove quotes
+	if len(text) >= 2 && text[0] == '\'' && text[len(text)-1] == '\'' {
+		return text[1 : len(text)-1], nil
+	}
+
+	// Handle negated sets like ~[...] FIRST (before checking for ..)
+	if strings.HasPrefix(text, "~[") && strings.HasSuffix(text, "]") {
+		return g.generateFromNegatedSet(text)
+	}
+
+	// Handle character sets like [a-zA-Z]
+	if len(text) >= 2 && text[0] == '[' && text[len(text)-1] == ']' {
+		return g.generateFromCharacterSet(text[1 : len(text)-1])
+	}
+
+	// Handle character ranges like 'a'..'z'
+	if strings.Contains(text, "..") {
+		return g.generateFromCharacterRange(text)
+	}
+
+	// Default: return the literal as-is
+	return text, nil
+}
+
+// generateFromCharacterSet generates a character from a character set like [a-zA-Z0-9_]
+func (g *TokenGenerator) generateFromCharacterSet(charset string) (string, error) {
+	chars, err := g.expandCharacterSet(charset)
+	if err != nil {
+		return "", err
+	}
+	if len(chars) == 0 {
+		return "", fmt.Errorf("empty character set")
+	}
+	
+	// Select a random character from the set
+	index := g.random.Intn(len(chars))
+	return string(chars[index]), nil
+}
+
+// expandCharacterSet expands a character set specification into actual characters
+func (g *TokenGenerator) expandCharacterSet(charset string) ([]rune, error) {
+	var chars []rune
+	i := 0
+	
+	for i < len(charset) {
+		// Handle escape sequences
+		if i < len(charset) && charset[i] == '\\' && i+1 < len(charset) {
+			switch charset[i+1] {
+			case 'r':
+				chars = append(chars, '\r')
+			case 'n':
+				chars = append(chars, '\n')
+			case 't':
+				chars = append(chars, '\t')
+			case '\\':
+				chars = append(chars, '\\')
+			case '"':
+				chars = append(chars, '"')
+			case '\'':
+				chars = append(chars, '\'')
+			default:
+				// For unknown escapes, use the escaped character literally
+				chars = append(chars, rune(charset[i+1]))
+			}
+			i += 2
+		} else if i+2 < len(charset) && charset[i+1] == '-' && charset[i+2] != '\\' {
+			// Handle range like a-z (but not when second char is an escape)
+			start := rune(charset[i])
+			end := rune(charset[i+2])
+			
+			if start > end {
+				return nil, fmt.Errorf("invalid character range: %c-%c", start, end)
+			}
+			
+			for c := start; c <= end; c++ {
+				chars = append(chars, c)
+			}
+			i += 3
+		} else {
+			// Handle single character
+			chars = append(chars, rune(charset[i]))
+			i++
+		}
+	}
+	
+	return chars, nil
+}
+
+// generateFromCharacterRange generates from a character range like 'a'..'z'
+func (g *TokenGenerator) generateFromCharacterRange(rangeText string) (string, error) {
+	// Extract start and end characters from 'a'..'z' format
+	parts := strings.Split(rangeText, "..")
+	if len(parts) != 2 {
+		return "", fmt.Errorf("invalid character range format: %s", rangeText)
+	}
+	
+	start := strings.Trim(parts[0], "'\"")
+	end := strings.Trim(parts[1], "'\"")
+	
+	if len(start) != 1 || len(end) != 1 {
+		return "", fmt.Errorf("character range must be single characters: %s", rangeText)
+	}
+	
+	startChar := rune(start[0])
+	endChar := rune(end[0])
+	
+	if startChar > endChar {
+		return "", fmt.Errorf("invalid character range: %c > %c", startChar, endChar)
+	}
+	
+	// Generate random character in range
+	rangeSize := int(endChar - startChar + 1)
+	offset := g.random.Intn(rangeSize)
+	result := startChar + rune(offset)
+	
+	return string(result), nil
+}
+
+// generateFromBlock generates text from a block value
+func (g *TokenGenerator) generateFromBlock(block grammar.BlockValue, depth int) (string, error) {
+	if len(block.Alternatives) == 0 {
+		return "", nil
+	}
+	
+	// Select a random alternative from the block
+	altIndex := g.random.Intn(len(block.Alternatives))
+	alternative := &block.Alternatives[altIndex]
+	
+	return g.generateFromAlternative(alternative, depth)
+}
+
+// generateFromWildcard generates a character for wildcard (.)
+func (g *TokenGenerator) generateFromWildcard() (string, error) {
+	// Generate a random printable ASCII character
+	// Range: 32-126 (space to tilde)
+	char := rune(32 + g.random.Intn(95))
+	return string(char), nil
+}
+
+// generateFromNegatedSet generates a character NOT in the specified set
+func (g *TokenGenerator) generateFromNegatedSet(negatedSet string) (string, error) {
+	// Extract the character set from ~[...] format
+	if len(negatedSet) < 4 || !strings.HasPrefix(negatedSet, "~[") || !strings.HasSuffix(negatedSet, "]") {
+		return "", fmt.Errorf("invalid negated set format: %s", negatedSet)
+	}
+	
+	charset := negatedSet[2 : len(negatedSet)-1] // Remove ~[ and ]
+	
+	// Expand the excluded character set
+	excludedChars, err := g.expandCharacterSet(charset)
+	if err != nil {
+		return "", fmt.Errorf("failed to expand excluded character set: %v", err)
+	}
+	
+	// Create a map for quick lookup
+	excluded := make(map[rune]bool)
+	for _, c := range excludedChars {
+		excluded[c] = true
+	}
+	
+	// Generate a character that's not in the excluded set
+	// Try common printable ASCII characters first
+	candidates := []rune{}
+	
+	// Add letters
+	for c := 'a'; c <= 'z'; c++ {
+		if !excluded[c] {
+			candidates = append(candidates, c)
+		}
+	}
+	for c := 'A'; c <= 'Z'; c++ {
+		if !excluded[c] {
+			candidates = append(candidates, c)
+		}
+	}
+	
+	// Add digits
+	for c := '0'; c <= '9'; c++ {
+		if !excluded[c] {
+			candidates = append(candidates, c)
+		}
+	}
+	
+	// Add some special characters
+	specialChars := []rune{' ', '!', '#', '$', '%', '&', '*', '+', '/', '=', '?', '@', '^', '_', '`', '|', '~'}
+	for _, c := range specialChars {
+		if !excluded[c] {
+			candidates = append(candidates, c)
+		}
+	}
+	
+	if len(candidates) == 0 {
+		return "", fmt.Errorf("no valid characters available (all excluded)")
+	}
+	
+	// Select a random candidate
+	index := g.random.Intn(len(candidates))
+	return string(candidates[index]), nil
+}
+
+// ValidateCharacterSet validates if a character set specification is valid
+func ValidateCharacterSet(charset string) error {
+	// Use regex to validate basic character set patterns
+	validPattern := regexp.MustCompile(`^[a-zA-Z0-9_\-\[\]\\^]+$`)
+	if !validPattern.MatchString(charset) {
+		return fmt.Errorf("invalid characters in character set: %s", charset)
+	}
+	return nil
+}
\ No newline at end of file
diff --git a/tools/fuzzing/internal/lexer/token_generator_test.go b/tools/fuzzing/internal/lexer/token_generator_test.go
new file mode 100644
index 0000000..c944268
--- /dev/null
+++ b/tools/fuzzing/internal/lexer/token_generator_test.go
@@ -0,0 +1,344 @@
+package lexer
+
+import (
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"testing"
+
+	"github.com/bytebase/parser/tools/fuzzing/internal/grammar"
+)
+
+// TestTokenGeneratorBasic tests basic token generation functionality
+func TestTokenGeneratorBasic(t *testing.T) {
+	config := &TokenGeneratorConfig{
+		MaxQuantifierCount:  3,
+		MinQuantifierCount:  1,
+		OptionalProbability: 1.0, // Always include optional elements for testing
+		MaxDepth:           5,
+	}
+	generator := NewTokenGenerator(12345, config)
+
+	tests := []struct {
+		ruleName    string
+		grammarText string
+		validator   func(string) bool
+		description string
+	}{
+		{
+			ruleName:    "SELECT",
+			grammarText: "SELECT: 'SELECT';",
+			validator:   func(s string) bool { return s == "SELECT" },
+			description: "simple string literal",
+		},
+		{
+			ruleName:    "LETTER",
+			grammarText: "LETTER: [a-z];",
+			validator:   func(s string) bool { return len(s) == 1 && s[0] >= 'a' && s[0] <= 'z' },
+			description: "single character range",
+		},
+		{
+			ruleName:    "DIGIT",
+			grammarText: "DIGIT: [0-9];",
+			validator:   func(s string) bool { return len(s) == 1 && s[0] >= '0' && s[0] <= '9' },
+			description: "digit character range",
+		},
+		{
+			ruleName:    "IDENTIFIER",
+			grammarText: "IDENTIFIER: [a-zA-Z_] [a-zA-Z0-9_]*;",
+			validator: func(s string) bool {
+				if len(s) == 0 {
+					return false
+				}
+				// First character must be letter or underscore
+				first := s[0]
+				if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
+					return false
+				}
+				// Rest must be letters, digits, or underscore
+				for _, c := range s[1:] {
+					if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') {
+						return false
+					}
+				}
+				return true
+			},
+			description: "identifier with quantifier",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.description, func(t *testing.T) {
+			// Create a temporary grammar file
+			grammarContent := "lexer grammar Test;\n\n" + tt.grammarText
+			tmpFile := createTempGrammarFile(t, grammarContent)
+			defer os.Remove(tmpFile)
+
+			// Parse the grammar
+			parsedGrammar, err := grammar.ParseGrammarFile(tmpFile)
+			if err != nil {
+				t.Fatalf("Failed to parse grammar: %v", err)
+			}
+
+			// Get the rule
+			rule := parsedGrammar.GetRule(tt.ruleName)
+			if rule == nil {
+				t.Fatalf("Rule %s not found", tt.ruleName)
+			}
+
+			// Generate multiple tokens to test consistency
+			for i := 0; i < 10; i++ {
+				token, err := generator.GenerateToken(rule)
+				if err != nil {
+					t.Errorf("Failed to generate token: %v", err)
+					continue
+				}
+
+				if !tt.validator(token) {
+					t.Errorf("Generated token '%s' does not match expected pattern for %s", token, tt.description)
+				}
+			}
+		})
+	}
+}
+
+// TestQuantifierHandling tests EBNF quantifier handling
+func TestQuantifierHandling(t *testing.T) {
+	config := &TokenGeneratorConfig{
+		MaxQuantifierCount:  5,
+		MinQuantifierCount:  2,
+		OptionalProbability: 0.5,
+		MaxDepth:           5,
+	}
+	generator := NewTokenGenerator(54321, config)
+
+	tests := []struct {
+		ruleName    string
+		grammarText string
+		validator   func(string) bool
+		description string
+	}{
+		{
+			ruleName:    "OPTIONAL",
+			grammarText: "OPTIONAL: 'A' 'B'?;",
+			validator: func(s string) bool {
+				return s == "A" || s == "AB"
+			},
+			description: "optional element with ?",
+		},
+		{
+			ruleName:    "ZERO_MORE",
+			grammarText: "ZERO_MORE: 'X' 'Y'*;",
+			validator: func(s string) bool {
+				if !strings.HasPrefix(s, "X") {
+					return false
+				}
+				rest := s[1:]
+				for _, c := range rest {
+					if c != 'Y' {
+						return false
+					}
+				}
+				return true
+			},
+			description: "zero or more with *",
+		},
+		{
+			ruleName:    "ONE_MORE",
+			grammarText: "ONE_MORE: 'Z' 'W'+;",
+			validator: func(s string) bool {
+				if !strings.HasPrefix(s, "Z") {
+					return false
+				}
+				rest := s[1:]
+				if len(rest) == 0 {
+					return false // + requires at least one
+				}
+				for _, c := range rest {
+					if c != 'W' {
+						return false
+					}
+				}
+				return true
+			},
+			description: "one or more with +",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.description, func(t *testing.T) {
+			// Create a temporary grammar file
+			grammarContent := "lexer grammar Test;\n\n" + tt.grammarText
+			tmpFile := createTempGrammarFile(t, grammarContent)
+			defer os.Remove(tmpFile)
+
+			// Parse the grammar
+			parsedGrammar, err := grammar.ParseGrammarFile(tmpFile)
+			if err != nil {
+				t.Fatalf("Failed to parse grammar: %v", err)
+			}
+
+			// Get the rule
+			rule := parsedGrammar.GetRule(tt.ruleName)
+			if rule == nil {
+				t.Fatalf("Rule %s not found", tt.ruleName)
+			}
+
+			// Generate multiple tokens to test quantifier behavior
+			validCount := 0
+			for i := 0; i < 20; i++ {
+				token, err := generator.GenerateToken(rule)
+				if err != nil {
+					t.Errorf("Failed to generate token: %v", err)
+					continue
+				}
+
+				if tt.validator(token) {
+					validCount++
+				} else {
+					t.Logf("Generated token '%s' for %s (validation failed but continuing)", token, tt.description)
+				}
+			}
+
+			// At least 50% of generated tokens should be valid
+			if validCount < 10 {
+				t.Errorf("Too few valid tokens generated (%d/20) for %s", validCount, tt.description)
+			}
+		})
+	}
+}
+
+// TestCharacterSetExpansion tests character set expansion functionality
+func TestCharacterSetExpansion(t *testing.T) {
+	generator := NewTokenGenerator(9999, nil)
+
+	tests := []struct {
+		charset  string
+		expected []rune
+	}{
+		{"abc", []rune{'a', 'b', 'c'}},
+		{"a-c", []rune{'a', 'b', 'c'}},
+		{"0-2", []rune{'0', '1', '2'}},
+		{"a-cX", []rune{'a', 'b', 'c', 'X'}},
+		{"A-Z_", append(makeRange('A', 'Z'), '_')},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.charset, func(t *testing.T) {
+			result, err := generator.expandCharacterSet(tt.charset)
+			if err != nil {
+				t.Fatalf("Failed to expand character set '%s': %v", tt.charset, err)
+			}
+
+			if len(result) != len(tt.expected) {
+				t.Errorf("Expected %d characters, got %d", len(tt.expected), len(result))
+				return
+			}
+
+			for i, expected := range tt.expected {
+				if result[i] != expected {
+					t.Errorf("At position %d: expected '%c', got '%c'", i, expected, result[i])
+				}
+			}
+		})
+	}
+}
+
+// TestComplexLexerRules tests complex lexer rules with multiple elements
+func TestComplexLexerRules(t *testing.T) {
+	config := &TokenGeneratorConfig{
+		MaxQuantifierCount:  3,
+		MinQuantifierCount:  1,
+		OptionalProbability: 0.8,
+		MaxDepth:           10,
+	}
+	generator := NewTokenGenerator(11111, config)
+
+	grammarContent := `
+lexer grammar ComplexTest;
+
+// Complex identifier rule
+IDENTIFIER: [a-zA-Z_] [a-zA-Z0-9_]*;
+
+// Number with optional decimal part
+NUMBER: [0-9]+ ('.' [0-9]+)?;
+
+// String with escaped quotes  
+STRING: '"' (~'"')* '"';
+
+// Comment line
+COMMENT: '//' (~[\r\n])*;
+`
+
+	tmpFile := createTempGrammarFile(t, grammarContent)
+	defer os.Remove(tmpFile)
+
+	parsedGrammar, err := grammar.ParseGrammarFile(tmpFile)
+	if err != nil {
+		t.Fatalf("Failed to parse complex grammar: %v", err)
+	}
+
+	tests := []struct {
+		ruleName string
+		pattern  string
+	}{
+		{"IDENTIFIER", `^[a-zA-Z_][a-zA-Z0-9_]*$`},
+		{"NUMBER", `^[0-9]+(\.[0-9]+)?$`},
+		{"STRING", `^"[^"]*"$`},
+		{"COMMENT", `^//.*$`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.ruleName, func(t *testing.T) {
+			rule := parsedGrammar.GetRule(tt.ruleName)
+			if rule == nil {
+				t.Fatalf("Rule %s not found", tt.ruleName)
+			}
+
+			regex := regexp.MustCompile(tt.pattern)
+			validCount := 0
+
+			for i := 0; i < 10; i++ {
+				token, err := generator.GenerateToken(rule)
+				if err != nil {
+					t.Errorf("Failed to generate token for %s: %v", tt.ruleName, err)
+					continue
+				}
+
+				t.Logf("Generated token for %s: '%s'", tt.ruleName, token)
+
+				if regex.MatchString(token) {
+					validCount++
+				}
+			}
+
+			// Expect at least some valid tokens
+			if validCount == 0 {
+				t.Errorf("No valid tokens generated for %s", tt.ruleName)
+			}
+		})
+	}
+}
+
+// Helper functions
+
+func createTempGrammarFile(t *testing.T, content string) string {
+	tmpDir := os.TempDir()
+	tmpFile := filepath.Join(tmpDir, "test_lexer.g4")
+
+	err := os.WriteFile(tmpFile, []byte(content), 0644)
+	if err != nil {
+		t.Fatalf("Failed to create temp grammar file: %v", err)
+	}
+
+	return tmpFile
+}
+
+func makeRange(start, end rune) []rune {
+	var result []rune
+	for c := start; c <= end; c++ {
+		result = append(result, c)
+	}
+	return result
+}
\ No newline at end of file

From 5692d2125d4efc711df12e00e64b8b6cd55f51aa Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Thu, 28 Aug 2025 11:31:23 +0800
Subject: [PATCH 6/9] chore: merge grammars

---
 tools/fuzzing/internal/generator/generator.go |  45 ++----
 tools/fuzzing/internal/grammar/parser.go      |  62 ++++++++
 tools/fuzzing/internal/grammar/parser_test.go | 147 ++++++++++++++++++
 3 files changed, 223 insertions(+), 31 deletions(-)

diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go
index 207c677..a4b7840 100644
--- a/tools/fuzzing/internal/generator/generator.go
+++ b/tools/fuzzing/internal/generator/generator.go
@@ -11,9 +11,9 @@ import (
 
 // Generator handles the fuzzing logic
 type Generator struct {
-	config   *config.Config
-	random   *rand.Rand
-	grammars []*grammar.ParsedGrammar
+	config  *config.Config
+	random  *rand.Rand
+	grammar *grammar.ParsedGrammar
 }
 
 // New creates a new generator with the given configuration
@@ -28,20 +28,18 @@ func New(cfg *config.Config) *Generator {
 func (g *Generator) Generate() error {
 	fmt.Println("Initializing grammar parser...")
 	
-	// Parse all grammar files
-	g.grammars = make([]*grammar.ParsedGrammar, len(g.config.GrammarFiles))
-	for i, filePath := range g.config.GrammarFiles {
-		parsedGrammar, err := grammar.ParseGrammarFile(filePath)
-		if err != nil {
-			return errors.Wrapf(err, "failed to parse grammar file %s", filePath)
-		}
-		g.grammars[i] = parsedGrammar
-		fmt.Printf("Parsed grammar file: %s\n", filePath)
+	// Parse and merge all grammar files into a single grammar
+	var err error
+	g.grammar, err = grammar.ParseAndMergeGrammarFiles(g.config.GrammarFiles)
+	if err != nil {
+		return errors.Wrap(err, "failed to parse and merge grammar files")
 	}
+	
+	fmt.Printf("Parsed and merged %d grammar files into single grammar\n", len(g.config.GrammarFiles))
 
 	// Validate start rule exists
-	if !g.hasRule(g.config.StartRule) {
-		return errors.Errorf("start rule '%s' not found in any grammar file", g.config.StartRule)
+	if g.grammar.GetRule(g.config.StartRule) == nil {
+		return errors.Errorf("start rule '%s' not found in merged grammar", g.config.StartRule)
 	}
 
 	fmt.Printf("Generating %d queries from rule '%s'...\n", g.config.Count, g.config.StartRule)
@@ -55,24 +53,9 @@ func (g *Generator) Generate() error {
 	return nil
 }
 
-// hasRule checks if a rule exists in any of the parsed grammars
-func (g *Generator) hasRule(ruleName string) bool {
-	for _, grammar := range g.grammars {
-		if grammar.GetRule(ruleName) != nil {
-			return true
-		}
-	}
-	return false
-}
-
-// getRule gets a rule from any of the parsed grammars
+// getRule gets a rule from the merged grammar
 func (g *Generator) getRule(ruleName string) *grammar.Rule {
-	for _, grammar := range g.grammars {
-		if rule := grammar.GetRule(ruleName); rule != nil {
-			return rule
-		}
-	}
-	return nil
+	return g.grammar.GetRule(ruleName)
 }
 
 // generateQuery creates a single query using grammar rules
diff --git a/tools/fuzzing/internal/grammar/parser.go b/tools/fuzzing/internal/grammar/parser.go
index 70ceec2..cd43d1c 100644
--- a/tools/fuzzing/internal/grammar/parser.go
+++ b/tools/fuzzing/internal/grammar/parser.go
@@ -191,6 +191,68 @@ func (g *ParsedGrammar) IsGeneratedBlock(name string) bool {
 	return exists
 }
 
+// MergeGrammar merges another grammar into this one
+func (g *ParsedGrammar) MergeGrammar(other *ParsedGrammar) error {
+	// Merge lexer rules
+	for name, rule := range other.LexerRules {
+		if _, exists := g.LexerRules[name]; exists {
+			return fmt.Errorf("duplicate lexer rule '%s' found in grammars '%s' and '%s'", name, g.FilePath, other.FilePath)
+		}
+		g.LexerRules[name] = rule
+	}
+	
+	// Merge parser rules
+	for name, rule := range other.ParserRules {
+		if _, exists := g.ParserRules[name]; exists {
+			return fmt.Errorf("duplicate parser rule '%s' found in grammars '%s' and '%s'", name, g.FilePath, other.FilePath)
+		}
+		g.ParserRules[name] = rule
+	}
+	
+	// Merge block alternatives map
+	for blockID, alternatives := range other.BlockAltMap {
+		if _, exists := g.BlockAltMap[blockID]; exists {
+			return fmt.Errorf("duplicate block ID '%s' found in grammars '%s' and '%s'", blockID, g.FilePath, other.FilePath)
+		}
+		g.BlockAltMap[blockID] = alternatives
+	}
+	
+	// Update file path to indicate it's a merged grammar
+	if g.FilePath != other.FilePath {
+		g.FilePath = fmt.Sprintf("%s + %s", g.FilePath, other.FilePath)
+	}
+	
+	return nil
+}
+
+// ParseAndMergeGrammarFiles parses multiple grammar files and merges them into a single ParsedGrammar
+func ParseAndMergeGrammarFiles(filePaths []string) (*ParsedGrammar, error) {
+	if len(filePaths) == 0 {
+		return nil, errors.New("no grammar files provided")
+	}
+	
+	// Parse the first grammar file
+	mergedGrammar, err := ParseGrammarFile(filePaths[0])
+	if err != nil {
+		return nil, errors.Wrapf(err, "failed to parse first grammar file %s", filePaths[0])
+	}
+	
+	// Merge additional grammar files
+	for i := 1; i < len(filePaths); i++ {
+		filePath := filePaths[i]
+		grammar, err := ParseGrammarFile(filePath)
+		if err != nil {
+			return nil, errors.Wrapf(err, "failed to parse grammar file %s", filePath)
+		}
+		
+		if err := mergedGrammar.MergeGrammar(grammar); err != nil {
+			return nil, errors.Wrapf(err, "failed to merge grammar file %s", filePath)
+		}
+	}
+	
+	return mergedGrammar, nil
+}
+
 // IsRule checks if an element refers to another rule or generated block
 func (e *Element) IsRule() bool {
 	_, isRef := e.Value.(ReferenceValue)
diff --git a/tools/fuzzing/internal/grammar/parser_test.go b/tools/fuzzing/internal/grammar/parser_test.go
index e3ee4cb..15cb127 100644
--- a/tools/fuzzing/internal/grammar/parser_test.go
+++ b/tools/fuzzing/internal/grammar/parser_test.go
@@ -3,6 +3,7 @@ package grammar
 import (
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 )
 
@@ -218,6 +219,18 @@ func createTempGrammarFile(t *testing.T, content string) string {
 	return tmpFile
 }
 
+func createTempGrammarFileWithName(t *testing.T, content string, filename string) string {
+	tmpDir := os.TempDir()
+	tmpFile := filepath.Join(tmpDir, filename)
+
+	err := os.WriteFile(tmpFile, []byte(content), 0644)
+	if err != nil {
+		t.Fatalf("Failed to create temp grammar file: %v", err)
+	}
+
+	return tmpFile
+}
+
 // TestLexerRuleParsing tests the parsing of lexer rules
 func TestLexerRuleParsing(t *testing.T) {
 	grammarContent := `
@@ -407,4 +420,138 @@ WS: [ \t\r\n]+ -> skip;
 	if len(allRules) != 4 {
 		t.Errorf("Expected 4 total rules, got %d", len(allRules))
 	}
+}
+
+// TestGrammarMerging tests merging multiple grammar files
+func TestGrammarMerging(t *testing.T) {
+	// Create first grammar file (parser rules)
+	parserGrammarContent := `
+parser grammar ParserTest;
+
+options {
+    tokenVocab = LexerTest;
+}
+
+statement: selectStmt;
+selectStmt: 'SELECT' IDENTIFIER;
+`
+	
+	// Create second grammar file (lexer rules)
+	lexerGrammarContent := `
+lexer grammar LexerTest;
+
+IDENTIFIER: [a-zA-Z_] [a-zA-Z0-9_]*;
+WS: [ \t\r\n]+ -> skip;
+`
+
+	// Create temporary files with unique names
+	tmpParserFile := createTempGrammarFileWithName(t, parserGrammarContent, "test_parser.g4")
+	defer os.Remove(tmpParserFile)
+	
+	tmpLexerFile := createTempGrammarFileWithName(t, lexerGrammarContent, "test_lexer.g4")
+	defer os.Remove(tmpLexerFile)
+
+	// Test parsing and merging
+	filePaths := []string{tmpParserFile, tmpLexerFile}
+	mergedGrammar, err := ParseAndMergeGrammarFiles(filePaths)
+	if err != nil {
+		t.Fatalf("Failed to parse and merge grammar files: %v", err)
+	}
+
+	// Verify merged grammar properties
+	if mergedGrammar == nil {
+		t.Fatal("Merged grammar is nil")
+	}
+	
+	if len(mergedGrammar.ParserRules) != 2 {
+		t.Errorf("Expected 2 parser rules, got %d", len(mergedGrammar.ParserRules))
+	}
+	
+	if len(mergedGrammar.LexerRules) != 2 {
+		t.Errorf("Expected 2 lexer rules, got %d", len(mergedGrammar.LexerRules))
+	}
+
+	// Test that both parser and lexer rules are accessible
+	statement := mergedGrammar.GetRule("statement")
+	if statement == nil || statement.IsLexer {
+		t.Error("Parser rule 'statement' not found or incorrectly marked")
+	}
+
+	identifier := mergedGrammar.GetRule("IDENTIFIER")
+	if identifier == nil || !identifier.IsLexer {
+		t.Error("Lexer rule 'IDENTIFIER' not found or incorrectly marked")
+	}
+
+	// Test that merged path is updated
+	if !strings.Contains(mergedGrammar.FilePath, "+") {
+		t.Errorf("Expected merged file path to contain '+', got: %s", mergedGrammar.FilePath)
+	}
+
+	// Test GetAllRules on merged grammar
+	allRules := mergedGrammar.GetAllRules()
+	if len(allRules) != 4 {
+		t.Errorf("Expected 4 total rules in merged grammar, got %d", len(allRules))
+	}
+}
+
+// TestGrammarMergingWithConflicts tests handling of duplicate rule names
+func TestGrammarMergingWithConflicts(t *testing.T) {
+	// Create two grammars with conflicting rule names
+	grammar1Content := `
+lexer grammar Test1;
+IDENTIFIER: [a-z]+;
+`
+	
+	grammar2Content := `
+lexer grammar Test2;
+IDENTIFIER: [A-Z]+;  // Conflict with first grammar
+`
+
+	tmpFile1 := createTempGrammarFileWithName(t, grammar1Content, "conflict1.g4")
+	defer os.Remove(tmpFile1)
+	
+	tmpFile2 := createTempGrammarFileWithName(t, grammar2Content, "conflict2.g4")
+	defer os.Remove(tmpFile2)
+
+	// Test that merging fails with duplicate rule names
+	filePaths := []string{tmpFile1, tmpFile2}
+	_, err := ParseAndMergeGrammarFiles(filePaths)
+	if err == nil {
+		t.Error("Expected error when merging grammars with duplicate rule names")
+	}
+
+	if !strings.Contains(err.Error(), "duplicate") {
+		t.Errorf("Expected error about duplicate rules, got: %v", err)
+	}
+}
+
+// TestParseAndMergeGrammarFilesEdgeCases tests edge cases
+func TestParseAndMergeGrammarFilesEdgeCases(t *testing.T) {
+	// Test with empty file list
+	_, err := ParseAndMergeGrammarFiles([]string{})
+	if err == nil {
+		t.Error("Expected error with empty file list")
+	}
+
+	// Test with single file
+	grammarContent := `
+lexer grammar SingleTest;
+TOKEN: 'test';
+`
+	
+	tmpFile := createTempGrammarFileWithName(t, grammarContent, "single.g4")
+	defer os.Remove(tmpFile)
+
+	grammar, err := ParseAndMergeGrammarFiles([]string{tmpFile})
+	if err != nil {
+		t.Fatalf("Failed to parse single grammar file: %v", err)
+	}
+
+	if len(grammar.LexerRules) != 1 {
+		t.Errorf("Expected 1 lexer rule, got %d", len(grammar.LexerRules))
+	}
+
+	if grammar.GetRule("TOKEN") == nil {
+		t.Error("TOKEN rule not found in single file grammar")
+	}
 }
\ No newline at end of file

From 5659f1c09b90b68099015f57e835543c7beea46a Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Thu, 28 Aug 2025 11:32:43 +0800
Subject: [PATCH 7/9] chore: remove list grammar options

---
 tools/fuzzing/cmd/fuzzer/main.go | 49 +++++++-------------------------
 1 file changed, 11 insertions(+), 38 deletions(-)

diff --git a/tools/fuzzing/cmd/fuzzer/main.go b/tools/fuzzing/cmd/fuzzer/main.go
index dd62d7e..c388520 100644
--- a/tools/fuzzing/cmd/fuzzer/main.go
+++ b/tools/fuzzing/cmd/fuzzer/main.go
@@ -9,33 +9,31 @@ import (
 
 	"github.com/bytebase/parser/tools/fuzzing/internal/config"
 	"github.com/bytebase/parser/tools/fuzzing/internal/generator"
-	"github.com/bytebase/parser/tools/fuzzing/internal/grammar"
 )
 
 func main() {
 	cfg := parseFlags()
-	
+
 	if err := cfg.Validate(); err != nil {
 		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 		os.Exit(1)
 	}
-	
+
 	cfg.Print()
-	
+
 	gen := generator.New(cfg)
 	if err := gen.Generate(); err != nil {
 		fmt.Fprintf(os.Stderr, "Generation failed: %v\n", err)
 		os.Exit(1)
 	}
-	
+
 	fmt.Println("Generation completed successfully!")
 }
 
 func parseFlags() *config.Config {
 	cfg := &config.Config{}
-	var listGrammars bool
 	var grammarArg string
-	
+
 	flag.StringVar(&grammarArg, "grammar", "", "Grammar file(s): single file or comma-separated lexer,parser files")
 	flag.StringVar(&cfg.StartRule, "start-rule", "", "Starting grammar rule name")
 	flag.IntVar(&cfg.Count, "count", 10, "Number of queries to generate")
@@ -46,18 +44,15 @@ func parseFlags() *config.Config {
 	flag.IntVar(&cfg.QuantifierCount, "quantifier-count", 0, "Fixed count for all quantifiers (overrides min/max)")
 	flag.StringVar(&cfg.Output, "output", "", "Output file path (default: stdout)")
 	flag.Int64Var(&cfg.Seed, "seed", time.Now().UnixNano(), "Random seed for reproducible generation")
-	flag.BoolVar(&listGrammars, "list-grammars", false, "List all available grammars and exit")
-	
+
 	// Custom usage message
 	flag.Usage = func() {
 		fmt.Fprintf(os.Stderr, "Grammar-Aware Fuzzing Tool\n\n")
 		fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "Options:\n")
 		flag.PrintDefaults()
-		
+
 		fmt.Fprintf(os.Stderr, "\nExamples:\n")
-		fmt.Fprintf(os.Stderr, "  # List available grammars\n")
-		fmt.Fprintf(os.Stderr, "  %s --list-grammars\n\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "  # Single combined grammar file\n")
 		fmt.Fprintf(os.Stderr, "  %s --grammar combined.g4 --start-rule selectStmt --count 10\n\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "  # Separate lexer and parser files\n")
@@ -69,31 +64,9 @@ func parseFlags() *config.Config {
 		fmt.Fprintf(os.Stderr, "  # Output to file\n")
 		fmt.Fprintf(os.Stderr, "  %s --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 100 --output queries.sql\n\n", os.Args[0])
 	}
-	
+
 	flag.Parse()
-	
-	// Handle --list-grammars
-	if listGrammars {
-		grammars, err := grammar.ListAvailableGrammars()
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error listing grammars: %v\n", err)
-			os.Exit(1)
-		}
-		
-		fmt.Println("Available grammars:")
-		for _, g := range grammars {
-			files, err := grammar.DiscoverGrammarFiles(g)
-			if err != nil {
-				fmt.Printf("  %s (error: %v)\n", g, err)
-				continue
-			}
-			fmt.Printf("  %s\n", g)
-			fmt.Printf("    Lexer:  %s\n", files.LexerFile)
-			fmt.Printf("    Parser: %s\n", files.ParserFile)
-		}
-		os.Exit(0)
-	}
-	
+
 	// Parse grammar files from comma-separated argument
 	if grammarArg != "" {
 		files := strings.Split(grammarArg, ",")
@@ -103,6 +76,6 @@ func parseFlags() *config.Config {
 		}
 		cfg.GrammarFiles = files
 	}
-	
+
 	return cfg
-}
\ No newline at end of file
+}

From cf77e43c7d037d186e095897ecf736f75526d9d8 Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Fri, 29 Aug 2025 14:32:50 +0800
Subject: [PATCH 8/9] v1

---
 tools/fuzzing/DESIGN.md                       | 394 ------------------
 tools/fuzzing/Makefile                        |  27 +-
 tools/fuzzing/README.md                       | 124 ------
 tools/fuzzing/cmd/fuzzer/main.go              |  81 ----
 tools/fuzzing/internal/config/config.go       |  23 +
 tools/fuzzing/internal/generator/generator.go | 260 +++++++++++-
 tools/fuzzing/tests/postgresql_test.go        | 192 +++++++++
 tools/grammar/README.md                       |  36 --
 8 files changed, 461 insertions(+), 676 deletions(-)
 delete mode 100644 tools/fuzzing/DESIGN.md
 delete mode 100644 tools/fuzzing/README.md
 delete mode 100644 tools/fuzzing/cmd/fuzzer/main.go
 create mode 100644 tools/fuzzing/tests/postgresql_test.go
 delete mode 100644 tools/grammar/README.md

diff --git a/tools/fuzzing/DESIGN.md b/tools/fuzzing/DESIGN.md
deleted file mode 100644
index 5d05ad3..0000000
--- a/tools/fuzzing/DESIGN.md
+++ /dev/null
@@ -1,394 +0,0 @@
-# Grammar-Aware Fuzzing Tool Design
-
-## Overview
-
-A simple fuzzing tool that generates SQL inputs from ANTLR grammar rules to test parser performance on specific constructs.
-
-## Core Problems & Solutions
-
-### 1. Target Specific Rules
-**Problem**: Performance issues often occur in specific rules (e.g., `createProcedureStatement`)
-**Solution**: Allow users to specify starting rule chains
-
-```bash
-./fuzzer --grammar postgresql --start-rule createProcedureStatement --count 100
-./fuzzer --grammar cql --start-rule selectStatement.whereClause --count 50
-```
-
-### 2. Recursion Control  
-**Problem**: Grammar rules can be recursive, causing infinite loops during generation
-**Solution**: Limit recursion depth per rule (proven to handle all ANTLR recursion types)
-
-#### ANTLR 4 Recursion Types
-
-**Direct Left Recursion:**
-```antlr
-expr: expr '+' expr | INT    // expr directly refers to itself on left
-```
-
-**Direct Right Recursion:**
-```antlr
-expr: INT '+' expr | INT     // expr directly refers to itself on right  
-```
-
-**Indirect Recursion (Non-Left):**
-```antlr
-selectStmt: SELECT columns fromClause whereClause?
-whereClause: WHERE expr
-expr: '(' selectStmt ')' | INT   // Indirect: expr -> selectStmt -> whereClause -> expr
-```
-*Note: ANTLR 4 does NOT support mutually left recursive grammars. This example is valid because the recursion is not left-recursive (selectStmt doesn't start with selectStmt).*
-
-**Self-Recursion with Alternatives:**
-```antlr
-stmt: ifStmt | whileStmt | blockStmt
-blockStmt: '{' stmt* '}'         // blockStmt contains multiple stmt references
-```
-
-#### Why Depth Control Works
-
-**Theorem**: Any grammar rule expansion terminates in finite steps with depth limiting.
-
-**Proof by Contradiction:**
-1. Assume infinite expansion despite depth limit `D`
-2. Each recursive call increases depth: `depth(rule_n) = depth(rule_{n-1}) + 1`
-3. When `depth ≥ D`, generator forces terminal selection
-4. Therefore, maximum expansion depth is bounded by `D`
-5. Since each rule has finite alternatives and finite elements, total expansion is finite ∎
-
-#### Depth Control Implementation
-
-```go
-func (g *Generator) GenerateFromRule(ruleName string, currentDepth int) string {
-    // Base case: exceed depth limit -> force terminal
-    if currentDepth >= g.maxDepth {
-        return g.forceTerminal(ruleName)
-    }
-    
-    rule := g.grammar.GetRule(ruleName)
-    
-    // Prefer non-recursive alternatives as depth increases
-    alternative := g.selectAlternativeWithDepthBias(rule, currentDepth)
-    
-    result := ""
-    for _, element := range alternative {
-        if element.IsRule() {
-            // Recursive call with incremented depth
-            result += g.GenerateFromRule(element.Name, currentDepth+1)
-        } else {
-            result += element.Literal
-        }
-    }
-    return result
-}
-
-func (g *Generator) forceTerminal(ruleName string) string {
-    rule := g.grammar.GetRule(ruleName)
-    
-    // Find non-recursive alternatives (containing only terminals)
-    for _, alt := range rule.Alternatives {
-        if !alt.ContainsRecursion() {
-            return g.expandAlternative(alt, g.maxDepth)
-        }
-    }
-    
-    // Fallback: use default terminal for this rule type
-    return g.getDefaultTerminal(ruleName)
-}
-```
-
-#### Examples with Depth Control
-
-```bash
-./fuzzer --start-rule expr --max-depth 3 --count 5
-```
-
-**Generated sequences:**
-- Depth 0: `INT` (terminal)
-- Depth 1: `INT + INT` 
-- Depth 2: `(INT + INT) + INT`
-- Depth 3: `((INT + INT) + INT) + INT` (max depth reached)
-
-**Complex mutual recursion:**
-```bash  
-./fuzzer --start-rule selectStmt --max-depth 4 --count 3
-```
-
-**Expansion trace:**
-```
-selectStmt (depth=0)
-├── SELECT columns FROM table whereClause (depth=0)
-    └── whereClause (depth=1)  
-        └── WHERE expr (depth=1)
-            └── '(' selectStmt ')' (depth=2)
-                └── selectStmt (depth=2)
-                    └── SELECT columns FROM table (depth=2, no whereClause to avoid depth=4)
-```
-
-#### Depth Strategy Options
-
-**Conservative (Early Termination):**
-- Lower max depth (3-5)
-- Bias toward terminals as depth increases
-- Prevents deep nesting, faster generation
-
-**Aggressive (Deep Testing):**  
-- Higher max depth (10-15)
-- Equal probability until max depth
-- Tests parser limits, slower generation
-
-```bash
-# Conservative - quick, shallow testing
-./fuzzer --start-rule expr --max-depth 3 --depth-strategy conservative
-
-# Aggressive - deep parser stress testing  
-./fuzzer --start-rule createProcedureStmt --max-depth 12 --depth-strategy aggressive
-```
-
-### 3. Optional Rule Probability
-**Problem**: Optional rules (`selectStmt: SELECT columns FROM table whereClause?`) need probability control
-**Solution**: Configure probability for optional elements (standard in grammar-based fuzzing)
-
-### 4. Quantified Rule Generation
-**Problem**: Quantified rules (`stmt*`, `expr+`, `column{1,5}`) need count control
-**Solution**: Configure generation counts for quantified elements
-
-#### ANTLR 4 Quantifier Types
-
-**Zero or More (`rule*`):**
-```antlr
-blockStmt: '{' stmt* '}'        // Generate 0 to N statements
-selectList: column (',' column)*  // Generate 1 to N columns
-```
-
-**One or More (`rule+`):**  
-```antlr
-identifier: LETTER (LETTER | DIGIT)+  // Generate 1 to N characters
-```
-
-**Note**: ANTLR v4 does not support `{n}` or `{n,m}` quantifier syntax. These are regex-style quantifiers not supported in ANTLR grammar files.
-
-#### Quantifier Control Strategy
-
-**Count Distribution Options:**
-- **Uniform**: Equal probability for each count in range
-- **Exponential**: Higher probability for lower counts (realistic)  
-- **Fixed**: Always generate specific count
-
-```bash
-# Basic usage - user specifies max count
-./fuzzer --start-rule blockStmt --max-quantifier 10 --count 100
-
-# User controls both min and max for quantifiers  
-./fuzzer --start-rule selectList --min-quantifier 1 --max-quantifier 5 --count 50
-
-# Fixed count for performance testing
-./fuzzer --start-rule selectStmt --quantifier-count 100 --count 10
-```
-
-#### Implementation Logic
-
-```go
-type QuantifierConfig struct {
-    Strategy   string // "uniform", "exponential", "fixed"
-    MinRepeat  int    // Minimum repetitions (overrides grammar min)
-    MaxRepeat  int    // Maximum repetitions (overrides grammar max)  
-    FixedCount int    // Fixed count for "fixed" strategy
-}
-
-func (g *Generator) generateQuantified(element *GrammarElement, config QuantifierConfig) string {
-    var count int
-    
-    switch element.Quantifier {
-    case "*": // Zero or more
-        min := max(0, config.MinRepeat)
-        max := min(config.MaxRepeat, 50) // Reasonable default limit
-        count = g.selectCount(min, max, config.Strategy)
-        
-    case "+": // One or more  
-        min := max(1, config.MinRepeat)
-        max := min(config.MaxRepeat, 50)
-        count = g.selectCount(min, max, config.Strategy)
-        
-    // Note: ANTLR v4 does not support {n} or {min,max} syntax
-    }
-    
-    result := ""
-    for i := 0; i < count; i++ {
-        if element.IsRule() {
-            result += g.GenerateFromRule(element.RuleName, g.currentDepth+1)
-        } else {
-            result += element.Literal
-        }
-        
-        // Add separators for lists (e.g., comma-separated)
-        if i < count-1 && element.HasSeparator() {
-            result += element.Separator
-        }
-    }
-    return result
-}
-
-func (g *Generator) selectCount(min, max int, strategy string) int {
-    if min > max {
-        return min
-    }
-    
-    switch strategy {
-    case "fixed":
-        return min // Use minimum as fixed value
-        
-    case "uniform":
-        return min + g.random.Intn(max-min+1)
-        
-    case "exponential":
-        // Exponential decay: higher probability for lower counts
-        range_size := max - min + 1
-        // Generate exponentially distributed number, then map to range
-        lambda := 2.0 / float64(range_size)
-        exp_val := g.random.ExpFloat64() / lambda
-        count := min + int(exp_val)
-        if count > max {
-            count = max
-        }
-        return count
-        
-    default:
-        return min + g.random.Intn(max-min+1)
-    }
-}
-```
-
-#### Examples with Quantifier Control
-
-**Block statement with multiple statements:**
-```bash
-./fuzzer --start-rule blockStmt --quantifier-strategy exponential --max-repeat 8
-```
-**Generated:**
-- 70% chance: `{ stmt; }` (1 statement)
-- 20% chance: `{ stmt; stmt; }` (2 statements)  
-- 7% chance: `{ stmt; stmt; stmt; }` (3 statements)
-- 3% chance: 4+ statements
-
-**Column list generation:**
-```bash  
-./fuzzer --start-rule selectList --quantifier-strategy uniform --min-repeat 3 --max-repeat 7
-```
-**Generated:**
-- Equal probability: `col1, col2, col3` to `col1, col2, col3, col4, col5, col6, col7`
-
-**Performance testing with large lists:**
-```bash
-./fuzzer --start-rule selectStmt --quantifier-count 100 --count 5
-```
-**Generated:**
-- Always generates exactly 100 columns to test parser performance on large SELECT lists
-
-**Simple user control:**
-```bash
-./fuzzer --start-rule blockStmt --max-quantifier 3 --count 10
-```
-**Generated:**
-- `stmt*` generates 0-3 statements
-- `expr+` generates 1-3 expressions  
-- User controls maximum without complex strategy options
-
-```bash
-./fuzzer --start-rule selectStmt --optional-prob 0.7 --count 100
-# 70% chance to include optional whereClause
-```
-
-## Simple Architecture
-
-```
-tools/fuzzing/
-├── main.go              # CLI entry point
-├── generator.go         # Core generation logic
-└── grammar_parser.go    # Reuse tools/grammar/ 
-```
-
-## Core Logic
-
-```go
-type Generator struct {
-    grammar     *ParsedGrammar
-    maxDepth    int
-    optionalProb float64
-    random      *rand.Rand
-}
-
-func (g *Generator) GenerateFromRule(ruleName string, currentDepth int) string {
-    if currentDepth > g.maxDepth {
-        return g.generateTerminal() // Stop recursion
-    }
-    
-    rule := g.grammar.GetRule(ruleName)
-    alternative := g.selectAlternative(rule)
-    
-    result := ""
-    for _, element := range alternative {
-        if element.IsOptional() && g.random.Float64() > g.optionalProb {
-            continue // Skip optional element
-        }
-        if element.IsRule() {
-            result += g.GenerateFromRule(element.Name, currentDepth+1)
-        } else {
-            result += element.Literal
-        }
-    }
-    return result
-}
-```
-
-## CLI Interface
-
-```bash
-# Basic usage - generate from specific rule
-./fuzzer --grammar postgresql --start-rule selectStmt --count 10
-
-# Control recursion depth  
-./fuzzer --grammar cql --start-rule expr --max-depth 3 --count 5
-
-# Control optional probability
-./fuzzer --grammar postgresql --start-rule createStmt --optional-prob 0.8 --count 10
-
-# Control quantifier max count (for rule*, rule+)
-./fuzzer --grammar postgresql --start-rule blockStmt --max-quantifier 8 --count 20
-
-# Control all parameters together
-./fuzzer --grammar cql --start-rule selectStmt \
-  --max-depth 5 \
-  --optional-prob 0.7 \
-  --max-quantifier 10 \
-  --count 50
-
-# Output to file
-./fuzzer --grammar postgresql --start-rule selectStmt --count 100 --output queries.sql
-```
-
-## Implementation Steps
-
-### Step 1: Basic Generator
-- Parse grammar using existing `tools/grammar/`
-- Simple rule expansion with depth limit
-- CLI with `--start-rule`, `--max-depth`, `--count`
-
-### Step 2: Optional Control  
-- Add `--optional-prob` flag
-- Detect optional elements in grammar rules
-- Apply probability during generation
-
-### Step 3: Integration
-- Test generated queries against parsers
-- Add basic performance timing
-- CI integration for regression testing
-
-## Common Fuzzing Techniques Used
-
-1. **Grammar-based generation** - Generate from formal grammar rules
-2. **Depth limiting** - Prevent infinite recursion in recursive grammars  
-3. **Probability-based selection** - Control optional rule inclusion
-4. **Targeted fuzzing** - Focus on specific rule paths instead of full grammar
-
-This approach is much simpler but addresses your specific needs for testing parser performance on particular constructs.
\ No newline at end of file
diff --git a/tools/fuzzing/Makefile b/tools/fuzzing/Makefile
index 20503f2..227ca53 100644
--- a/tools/fuzzing/Makefile
+++ b/tools/fuzzing/Makefile
@@ -1,16 +1,6 @@
-BINARY_NAME=fuzzer
-BUILD_DIR=bin
-CMD_PATH=github.com/bytebase/parser/tools/fuzzing/cmd/fuzzer
+.PHONY: all test clean help
 
-.PHONY: all build test clean run help
-
-all: build test
-
-# Build the binary
-build:
-	@echo "Building $(BINARY_NAME)..."
-	@mkdir -p $(BUILD_DIR)
-	go build -o $(BUILD_DIR)/$(BINARY_NAME) $(CMD_PATH)
+all: test
 
 # Run tests
 test:
@@ -20,13 +10,8 @@ test:
 # Clean build artifacts  
 clean:
 	@echo "Cleaning..."
-	rm -rf $(BUILD_DIR)
 	go clean
 
-# Run the fuzzer (requires arguments)
-run:
-	go run $(CMD_PATH) $(ARGS)
-
 # Install dependencies
 deps:
 	@echo "Installing dependencies..."
@@ -45,15 +30,9 @@ lint:
 # Show help
 help:
 	@echo "Available targets:"
-	@echo "  build    - Build the fuzzer binary"
 	@echo "  test     - Run all tests"
 	@echo "  clean    - Clean build artifacts"
-	@echo "  run      - Run the fuzzer (use ARGS='--grammar postgresql --start-rule selectStmt')"
 	@echo "  deps     - Install/update dependencies"
 	@echo "  fmt      - Format all Go code"
 	@echo "  lint     - Run golangci-lint"
-	@echo "  help     - Show this help message"
-	@echo ""
-	@echo "Examples:"
-	@echo "  make run ARGS='--grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 5'"
-	@echo "  make run ARGS='--help'"
\ No newline at end of file
+	@echo "  help     - Show this help message"
\ No newline at end of file
diff --git a/tools/fuzzing/README.md b/tools/fuzzing/README.md
deleted file mode 100644
index f496ff1..0000000
--- a/tools/fuzzing/README.md
+++ /dev/null
@@ -1,124 +0,0 @@
-# Grammar-Aware Fuzzing Tool
-
-A fuzzing tool that generates valid SQL inputs from ANTLR v4 grammar files for parser testing.
-
-## Quick Start
-
-```bash
-# Build the fuzzer
-make build
-
-# List available grammars
-./bin/fuzzer --list-grammars
-
-# Single combined grammar file
-./bin/fuzzer --grammar combined.g4 --start-rule selectStmt --count 10
-
-# Separate lexer and parser files
-./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 10
-
-# Run with custom parameters  
-./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 3 --max-quantifier 8 --count 5
-```
-
-## Project Structure
-
-```
-tools/fuzzing/
-├── cmd/fuzzer/          # CLI application entry point
-│   └── main.go
-├── internal/            # Private application packages
-│   ├── config/          # Configuration management
-│   └── generator/       # Core fuzzing logic
-├── bin/                 # Built binaries (created by make build)
-├── Makefile            # Build and development tasks
-└── go.mod              # Go module definition
-```
-
-## CLI Options
-
-| Flag | Description | Default |
-|------|-------------|---------|
-| `--grammar` | Grammar file(s): single file or comma-separated lexer,parser | - |
-| `--start-rule` | Starting grammar rule (required) | - |
-| `--count` | Number of queries to generate | 10 |
-| `--max-depth` | Maximum recursion depth | 5 |
-| `--optional-prob` | Probability of optional elements (0.0-1.0) | 0.5 |
-| `--max-quantifier` | Maximum count for `*` and `+` quantifiers | 5 |
-| `--min-quantifier` | Minimum count override | 0 |
-| `--quantifier-count` | Fixed count for all quantifiers | 0 |
-| `--output` | Output file path | stdout |
-| `--seed` | Random seed for reproducible results | current time |
-
-## Examples
-
-### Basic Usage
-```bash
-# Single combined grammar file
-./bin/fuzzer --grammar combined.g4 --start-rule selectStmt --count 10
-
-# Separate lexer and parser files
-./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 10
-
-# Generate CQL expressions with limited depth
-./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 3 --count 5
-```
-
-### Performance Testing
-```bash
-# Generate queries with exactly 100 columns
-./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --quantifier-count 100 --count 5
-
-# Generate deeply nested expressions  
-./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 15 --count 10
-```
-
-### Output Control
-```bash
-# Save to file
-./bin/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 100 --output queries.sql
-
-# Reproducible generation
-./bin/fuzzer --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --seed 42 --count 10
-```
-
-## Development
-
-### Build Commands
-```bash
-# From tools/fuzzing directory
-make build    # Build binary to bin/fuzzer
-make test     # Run all tests  
-make clean    # Clean build artifacts
-make fmt      # Format code
-make deps     # Install/update dependencies (runs from repo root)
-
-# From repository root
-go build -o tools/fuzzing/bin/fuzzer github.com/bytebase/parser/tools/fuzzing/cmd/fuzzer
-```
-
-### Running During Development
-```bash
-# From tools/fuzzing directory
-make run ARGS='--grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 5'
-make run ARGS='--help'
-
-# From repository root
-go run github.com/bytebase/parser/tools/fuzzing/cmd/fuzzer --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 5
-```
-
-## Monolithic Repository Structure
-
-This tool uses the single `go.mod` file at the repository root:
-- **Module**: `github.com/bytebase/parser`
-- **Import path**: `github.com/bytebase/parser/tools/fuzzing/...`
-- **Dependencies**: Shared with other tools in the repository
-
-## Integration
-
-This tool is designed to integrate with:
-- Existing ANTLR v4 grammar parser at `tools/grammar/`
-- All parser implementations in the repository (postgresql, cql, redshift, etc.)
-- Shared CI/CD pipeline and testing infrastructure
-
-**TODO**: Grammar parser integration and actual query generation logic.
\ No newline at end of file
diff --git a/tools/fuzzing/cmd/fuzzer/main.go b/tools/fuzzing/cmd/fuzzer/main.go
deleted file mode 100644
index c388520..0000000
--- a/tools/fuzzing/cmd/fuzzer/main.go
+++ /dev/null
@@ -1,81 +0,0 @@
-package main
-
-import (
-	"flag"
-	"fmt"
-	"os"
-	"strings"
-	"time"
-
-	"github.com/bytebase/parser/tools/fuzzing/internal/config"
-	"github.com/bytebase/parser/tools/fuzzing/internal/generator"
-)
-
-func main() {
-	cfg := parseFlags()
-
-	if err := cfg.Validate(); err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
-		os.Exit(1)
-	}
-
-	cfg.Print()
-
-	gen := generator.New(cfg)
-	if err := gen.Generate(); err != nil {
-		fmt.Fprintf(os.Stderr, "Generation failed: %v\n", err)
-		os.Exit(1)
-	}
-
-	fmt.Println("Generation completed successfully!")
-}
-
-func parseFlags() *config.Config {
-	cfg := &config.Config{}
-	var grammarArg string
-
-	flag.StringVar(&grammarArg, "grammar", "", "Grammar file(s): single file or comma-separated lexer,parser files")
-	flag.StringVar(&cfg.StartRule, "start-rule", "", "Starting grammar rule name")
-	flag.IntVar(&cfg.Count, "count", 10, "Number of queries to generate")
-	flag.IntVar(&cfg.MaxDepth, "max-depth", 5, "Maximum recursion depth")
-	flag.Float64Var(&cfg.OptionalProb, "optional-prob", 0.5, "Probability of including optional elements (0.0-1.0)")
-	flag.IntVar(&cfg.MaxQuantifier, "max-quantifier", 5, "Maximum count for quantified rules (* and +)")
-	flag.IntVar(&cfg.MinQuantifier, "min-quantifier", 0, "Minimum count for quantified rules (overrides grammar)")
-	flag.IntVar(&cfg.QuantifierCount, "quantifier-count", 0, "Fixed count for all quantifiers (overrides min/max)")
-	flag.StringVar(&cfg.Output, "output", "", "Output file path (default: stdout)")
-	flag.Int64Var(&cfg.Seed, "seed", time.Now().UnixNano(), "Random seed for reproducible generation")
-
-	// Custom usage message
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Grammar-Aware Fuzzing Tool\n\n")
-		fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "Options:\n")
-		flag.PrintDefaults()
-
-		fmt.Fprintf(os.Stderr, "\nExamples:\n")
-		fmt.Fprintf(os.Stderr, "  # Single combined grammar file\n")
-		fmt.Fprintf(os.Stderr, "  %s --grammar combined.g4 --start-rule selectStmt --count 10\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  # Separate lexer and parser files\n")
-		fmt.Fprintf(os.Stderr, "  %s --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 10\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  # Control recursion and quantifiers\n")
-		fmt.Fprintf(os.Stderr, "  %s --grammar cql/CqlLexer.g4,cql/CqlParser.g4 --start-rule expr --max-depth 3 --max-quantifier 8 --count 5\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  # Performance testing\n")
-		fmt.Fprintf(os.Stderr, "  %s --grammar redshift/RedshiftLexer.g4,redshift/RedshiftParser.g4 --start-rule blockStmt --quantifier-count 100 --count 10\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  # Output to file\n")
-		fmt.Fprintf(os.Stderr, "  %s --grammar postgresql/PostgreSQLLexer.g4,postgresql/PostgreSQLParser.g4 --start-rule selectStmt --count 100 --output queries.sql\n\n", os.Args[0])
-	}
-
-	flag.Parse()
-
-	// Parse grammar files from comma-separated argument
-	if grammarArg != "" {
-		files := strings.Split(grammarArg, ",")
-		// Trim whitespace from each file
-		for i, file := range files {
-			files[i] = strings.TrimSpace(file)
-		}
-		cfg.GrammarFiles = files
-	}
-
-	return cfg
-}
diff --git a/tools/fuzzing/internal/config/config.go b/tools/fuzzing/internal/config/config.go
index d976e37..3d4e27a 100644
--- a/tools/fuzzing/internal/config/config.go
+++ b/tools/fuzzing/internal/config/config.go
@@ -6,6 +6,28 @@ import (
 	"github.com/pkg/errors"
 )
 
+// OutputFormat represents different output formatting options
+type OutputFormat int
+
+const (
+	// CompactOutput shows cleaner, more readable output (default)
+	CompactOutput OutputFormat = iota
+	// VerboseOutput shows full grammar rule traversal with comments
+	VerboseOutput
+)
+
+// ParseOutputFormat parses a string into an OutputFormat
+func ParseOutputFormat(s string) OutputFormat {
+	switch s {
+	case "compact", "":
+		return CompactOutput
+	case "verbose":
+		return VerboseOutput
+	default:
+		return CompactOutput
+	}
+}
+
 // Config holds all configuration options for the fuzzer
 type Config struct {
 	GrammarFiles    []string // Can be one file (combined) or two files (lexer,parser)
@@ -17,6 +39,7 @@ type Config struct {
 	MinQuantifier   int
 	QuantifierCount int
 	Output          string
+	OutputFormat    OutputFormat // How to format the output
 	Seed            int64
 }
 
diff --git a/tools/fuzzing/internal/generator/generator.go b/tools/fuzzing/internal/generator/generator.go
index a4b7840..9eb2ea6 100644
--- a/tools/fuzzing/internal/generator/generator.go
+++ b/tools/fuzzing/internal/generator/generator.go
@@ -3,6 +3,7 @@ package generator
 import (
 	"fmt"
 	"math/rand"
+	"strings"
 
 	"github.com/bytebase/parser/tools/fuzzing/internal/config"
 	"github.com/bytebase/parser/tools/fuzzing/internal/grammar"
@@ -16,11 +17,19 @@ type Generator struct {
 	grammar *grammar.ParsedGrammar
 }
 
+// WorkItem represents a unit of work in the generation stack
+type WorkItem struct {
+	RuleName string
+	Depth    int
+	Result   *string // Pointer to where the result should be stored
+}
+
 // New creates a new generator with the given configuration
 func New(cfg *config.Config) *Generator {
 	return &Generator{
-		config: cfg,
-		random: rand.New(rand.NewSource(cfg.Seed)),
+		config:  cfg,
+		random:  rand.New(rand.NewSource(cfg.Seed)),
+		grammar: nil,
 	}
 }
 
@@ -58,18 +67,19 @@ func (g *Generator) getRule(ruleName string) *grammar.Rule {
 	return g.grammar.GetRule(ruleName)
 }
 
+
 // generateQuery creates a single query using grammar rules
 func (g *Generator) generateQuery(index int) string {
-	// Start generation from the specified start rule
+	// Start generation from the specified start rule with no recursion limit for now
 	result := g.generateFromRule(g.config.StartRule, 0)
 	return result
 }
 
-// generateFromRule recursively generates text from a grammar rule
+// generateFromRule generates text from a grammar rule
 func (g *Generator) generateFromRule(ruleName string, currentDepth int) string {
 	// Check depth limit to prevent infinite recursion
 	if currentDepth >= g.config.MaxDepth {
-		return g.generateTerminal(ruleName)
+		return fmt.Sprintf("<%s_MAX_DEPTH>", ruleName)
 	}
 
 	// Get the rule
@@ -96,7 +106,18 @@ func (g *Generator) generateFromRule(ruleName string, currentDepth int) string {
 		}
 	}
 
-	return fmt.Sprintf("/* %s */ %s", ruleName, joinWithSpaces(result))
+	// Format output based on configuration
+	switch g.config.OutputFormat {
+	case config.CompactOutput:
+		// Clean, readable output without verbose comments (default)
+		return joinWithSpaces(result)
+	case config.VerboseOutput:
+		// Full grammar rule traversal with comments
+		return fmt.Sprintf("/* %s */ %s", ruleName, joinWithSpaces(result))
+	default:
+		// Default to compact
+		return joinWithSpaces(result)
+	}
 }
 
 // generateFromElement generates text from a single grammar element
@@ -114,11 +135,11 @@ func (g *Generator) generateFromElement(element *grammar.Element, currentDepth i
 	// Generate single element
 	if element.IsRule() {
 		if refValue, ok := element.Value.(grammar.ReferenceValue); ok {
-			return g.generateFromRule(refValue.Name, currentDepth+1)
+			return g.generateFromRuleOrToken(refValue.Name, currentDepth+1)
 		} else if blockValue, ok := element.Value.(grammar.BlockValue); ok {
 			return g.generateFromBlock(blockValue, currentDepth)
 		}
-		return g.generateFromRule(element.Value.String(), currentDepth+1)
+		return g.generateFromRuleOrToken(element.Value.String(), currentDepth+1)
 	} else if element.IsTerminal() {
 		if litValue, ok := element.Value.(grammar.LiteralValue); ok {
 			return cleanLiteral(litValue.Text)
@@ -151,10 +172,13 @@ func (g *Generator) generateQuantified(element *grammar.Element, currentDepth in
 	for i := 0; i < count; i++ {
 		if element.IsRule() {
 			if refValue, ok := element.Value.(grammar.ReferenceValue); ok {
-				result := g.generateFromRule(refValue.Name, currentDepth+1)
+				result := g.generateFromRuleOrToken(refValue.Name, currentDepth+1)
+				results = append(results, result)
+			} else if blockValue, ok := element.Value.(grammar.BlockValue); ok {
+				result := g.generateFromBlock(blockValue, currentDepth+1)
 				results = append(results, result)
 			} else {
-				result := g.generateFromRule(element.Value.String(), currentDepth+1)
+				result := g.generateFromRuleOrToken(element.Value.String(), currentDepth+1)
 				results = append(results, result)
 			}
 		} else if element.IsTerminal() {
@@ -163,9 +187,6 @@ func (g *Generator) generateQuantified(element *grammar.Element, currentDepth in
 			} else {
 				results = append(results, cleanLiteral(element.Value.String()))
 			}
-		} else if blockValue, ok := element.Value.(grammar.BlockValue); ok {
-			result := g.generateFromBlock(blockValue, currentDepth+1)
-			results = append(results, result)
 		}
 	}
 
@@ -194,12 +215,217 @@ func (g *Generator) generateFromBlock(blockValue grammar.BlockValue, currentDept
 	return joinWithSpaces(result)
 }
 
-// generateTerminal generates a terminal when depth limit is reached
-func (g *Generator) generateTerminal(ruleName string) string {
-	// For depth-limited cases, return a simple placeholder
-	return fmt.Sprintf("<%s_TERM>", ruleName)
+
+// generateFromRuleOrToken generates from a rule using standard rule-based generation
+func (g *Generator) generateFromRuleOrToken(ruleName string, currentDepth int) string {
+	// Check if this is a lexer rule and generate concrete token
+	if rule := g.grammar.GetRule(ruleName); rule != nil && rule.IsLexer {
+		return g.generateConcreteToken(ruleName)
+	}
+	
+	// Otherwise expand as parser rule
+	return g.generateFromRule(ruleName, currentDepth)
+}
+
+// generateConcreteToken generates concrete tokens by expanding lexer rules
+func (g *Generator) generateConcreteToken(ruleName string) string {
+	// Get the lexer rule
+	rule := g.grammar.GetRule(ruleName)
+	if rule == nil || !rule.IsLexer {
+		return fmt.Sprintf("<%s>", ruleName)
+	}
+
+	// For lexer rules, we need to expand them but generate concrete characters
+	// at the terminal level (character sets, literals, etc.)
+	return g.generateFromLexerRule(rule, 0)
 }
 
+// generateFromLexerRule generates content from a lexer rule
+func (g *Generator) generateFromLexerRule(rule *grammar.Rule, currentDepth int) string {
+	if len(rule.Alternatives) == 0 {
+		return ""
+	}
+
+	// Select a random alternative
+	altIndex := g.random.Intn(len(rule.Alternatives))
+	alternative := rule.Alternatives[altIndex]
+
+	// Generate from all elements in the alternative
+	var result []string
+	for _, element := range alternative.Elements {
+		elementResult := g.generateFromLexerElement(&element, currentDepth)
+		if elementResult != "" {
+			result = append(result, elementResult)
+		}
+	}
+
+	return strings.Join(result, "")
+}
+
+// generateFromLexerElement generates content from a lexer element
+func (g *Generator) generateFromLexerElement(element *grammar.Element, currentDepth int) string {
+	// Handle optional elements
+	if element.IsOptional() && g.random.Float64() > g.config.OptionalProb {
+		return "" // Skip optional element
+	}
+
+	// Handle quantified elements
+	if element.IsQuantified() {
+		return g.generateQuantifiedLexer(element, currentDepth)
+	}
+
+	// Generate single element
+	if element.IsRule() {
+		if refValue, ok := element.Value.(grammar.ReferenceValue); ok {
+			// Check if referenced rule is lexer or parser
+			if referencedRule := g.grammar.GetRule(refValue.Name); referencedRule != nil && referencedRule.IsLexer {
+				return g.generateFromLexerRule(referencedRule, currentDepth+1)
+			} else {
+				// Parser rule - shouldn't happen in lexer context, but handle it
+				return g.generateFromRule(refValue.Name, currentDepth+1)
+			}
+		} else if blockValue, ok := element.Value.(grammar.BlockValue); ok {
+			return g.generateFromLexerBlock(blockValue, currentDepth)
+		}
+		return element.Value.String()
+	} else if element.IsTerminal() {
+		if litValue, ok := element.Value.(grammar.LiteralValue); ok {
+			return g.generateFromLiteral(litValue.Text)
+		}
+		return g.generateFromLiteral(element.Value.String())
+	}
+
+	return element.Value.String()
+}
+
+// generateQuantifiedLexer handles quantified lexer elements
+func (g *Generator) generateQuantifiedLexer(element *grammar.Element, currentDepth int) string {
+	var count int
+	
+	// Use fixed count if specified, otherwise use random count
+	if g.config.QuantifierCount > 0 {
+		count = g.config.QuantifierCount
+	} else {
+		switch element.Quantifier {
+		case grammar.ZERO_MORE: // *
+			count = g.random.Intn(g.config.MaxQuantifier + 1) // 0 to MaxQuantifier
+		case grammar.ONE_MORE: // +
+			count = 1 + g.random.Intn(g.config.MaxQuantifier) // 1 to MaxQuantifier
+		default:
+			count = 1
+		}
+	}
+
+	var results []string
+	for i := 0; i < count; i++ {
+		result := g.generateFromLexerElement(&grammar.Element{
+			Value:      element.Value,
+			Quantifier: grammar.NONE, // Remove quantifier for individual generation
+		}, currentDepth+1)
+		if result != "" {
+			results = append(results, result)
+		}
+	}
+
+	return strings.Join(results, "")
+}
+
+// generateFromLexerBlock generates content from a lexer block
+func (g *Generator) generateFromLexerBlock(blockValue grammar.BlockValue, currentDepth int) string {
+	if len(blockValue.Alternatives) == 0 {
+		return ""
+	}
+
+	// Select a random alternative from the block
+	altIndex := g.random.Intn(len(blockValue.Alternatives))
+	alternative := blockValue.Alternatives[altIndex]
+
+	// Generate from all elements in the selected alternative
+	var result []string
+	for _, element := range alternative.Elements {
+		elementResult := g.generateFromLexerElement(&element, currentDepth)
+		if elementResult != "" {
+			result = append(result, elementResult)
+		}
+	}
+
+	return strings.Join(result, "")
+}
+
+// generateFromLiteral generates concrete characters from lexer literals and character sets
+func (g *Generator) generateFromLiteral(literal string) string {
+	// Handle character sets like ~[\u0000"] or [a-zA-Z_]
+	if strings.HasPrefix(literal, "~[") && strings.HasSuffix(literal, "]") {
+		return g.generateFromNegatedSet(literal[2 : len(literal)-1])
+	} else if strings.HasPrefix(literal, "[") && strings.HasSuffix(literal, "]") {
+		return g.generateFromCharacterSet(literal[1 : len(literal)-1])
+	}
+	
+	// Handle string literals
+	if strings.HasPrefix(literal, "'") && strings.HasSuffix(literal, "'") && len(literal) >= 2 {
+		return literal[1 : len(literal)-1] // Remove quotes
+	}
+	
+	// Handle special escape sequences
+	switch literal {
+	case "\\r":
+		return "\r"
+	case "\\n":
+		return "\n"
+	case "\\t":
+		return "\t"
+	case "\\\"":
+		return "\""
+	case "\\'":
+		return "'"
+	case "\\\\":
+		return "\\"
+	}
+	
+	// Return as-is for other cases
+	return literal
+}
+
+// generateFromCharacterSet generates a random character from a character set like [a-zA-Z_]
+func (g *Generator) generateFromCharacterSet(charset string) string {
+	chars := []rune{}
+	
+	// Simple character set expansion - handle ranges like a-z, A-Z, 0-9
+	i := 0
+	for i < len(charset) {
+		if i+2 < len(charset) && charset[i+1] == '-' {
+			// Handle range like a-z
+			start := rune(charset[i])
+			end := rune(charset[i+2])
+			for r := start; r <= end; r++ {
+				chars = append(chars, r)
+			}
+			i += 3
+		} else {
+			// Single character
+			chars = append(chars, rune(charset[i]))
+			i++
+		}
+	}
+	
+	if len(chars) == 0 {
+		return "x" // Fallback
+	}
+	
+	return string(chars[g.random.Intn(len(chars))])
+}
+
+// generateFromNegatedSet generates a character NOT in the specified set
+func (g *Generator) generateFromNegatedSet(negatedSet string) string {
+	// For simplicity, generate common safe characters that are typically not in negated sets
+	safeChars := []string{"a", "b", "c", "x", "y", "z", "_", "1", "2", "3"}
+	
+	// TODO: Implement proper negated set handling by expanding the set and excluding those characters
+	// For now, just return a safe character
+	return safeChars[g.random.Intn(len(safeChars))]
+}
+
+
 // cleanLiteral removes quotes from literal strings
 func cleanLiteral(literal string) string {
 	// Remove single quotes from literals like 'SELECT'
diff --git a/tools/fuzzing/tests/postgresql_test.go b/tools/fuzzing/tests/postgresql_test.go
new file mode 100644
index 0000000..fa067a4
--- /dev/null
+++ b/tools/fuzzing/tests/postgresql_test.go
@@ -0,0 +1,192 @@
+package tests
+
+import (
+	"fmt"
+	"path/filepath"
+	"runtime"
+	"testing"
+
+	"github.com/bytebase/parser/tools/fuzzing/internal/config"
+	"github.com/bytebase/parser/tools/fuzzing/internal/generator"
+)
+
+// getRepoRoot finds the repository root directory
+func getRepoRoot() string {
+	_, filename, _, _ := runtime.Caller(0)
+	// Go up from tools/fuzzing/tests to the repo root
+	return filepath.Join(filepath.Dir(filename), "..", "..", "..")
+}
+
+func TestPostgreSQLSelectStmt(t *testing.T) {
+	repoRoot := getRepoRoot()
+	
+	// PostgreSQL grammar file paths
+	lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4")
+	parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4")
+
+	tests := []struct {
+		name         string
+		startRule    string
+		count        int
+		maxDepth     int
+		optionalProb float64
+		seed         int64
+	}{
+		{
+			name:         "Simple SELECT statements",
+			startRule:    "selectstmt",
+			count:        3,
+			maxDepth:     5,
+			optionalProb: 0.7,
+			seed:         42,
+		},
+		{
+			name:         "Deep SELECT statements",
+			startRule:    "selectstmt",
+			count:        2,
+			maxDepth:     8,
+			optionalProb: 0.5,
+			seed:         123,
+		},
+		{
+			name:         "Minimal SELECT statements",
+			startRule:    "selectstmt",
+			count:        5,
+			maxDepth:     3,
+			optionalProb: 0.3,
+			seed:         456,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := &config.Config{
+				GrammarFiles:    []string{lexerPath, parserPath},
+				StartRule:       tt.startRule,
+				Count:           tt.count,
+				MaxDepth:        tt.maxDepth,
+				OptionalProb:    tt.optionalProb,
+				MaxQuantifier:   3,
+				MinQuantifier:   1,
+				QuantifierCount: 0,
+				OutputFormat:    config.CompactOutput,
+				Seed:            tt.seed,
+			}
+
+			fmt.Printf("\n=== %s ===\n", tt.name)
+			fmt.Printf("Config: MaxDepth=%d, OptionalProb=%.1f, Count=%d, Seed=%d\n", 
+				tt.maxDepth, tt.optionalProb, tt.count, tt.seed)
+			fmt.Println()
+
+			gen := generator.New(cfg)
+			err := gen.Generate()
+
+			if err != nil {
+				t.Errorf("Failed to generate PostgreSQL %s: %v", tt.startRule, err)
+			} else {
+				t.Logf("Successfully generated %d PostgreSQL %s statements", tt.count, tt.startRule)
+			}
+		})
+	}
+}
+
+func TestPostgreSQLExpressions(t *testing.T) {
+	repoRoot := getRepoRoot()
+	
+	// PostgreSQL grammar file paths
+	lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4")
+	parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4")
+
+	cfg := &config.Config{
+		GrammarFiles:    []string{lexerPath, parserPath},
+		StartRule:       "a_expr", // PostgreSQL expression rule
+		Count:           5,
+		MaxDepth:        4,
+		OptionalProb:    0.6,
+		MaxQuantifier:   2,
+		MinQuantifier:   1,
+		QuantifierCount: 0,
+		OutputFormat:    config.CompactOutput,
+		Seed:            789,
+	}
+
+	fmt.Printf("\n=== PostgreSQL Expressions ===\n")
+	fmt.Printf("Generating %d expressions with max depth %d\n", cfg.Count, cfg.MaxDepth)
+	fmt.Println()
+
+	gen := generator.New(cfg)
+	err := gen.Generate()
+
+	if err != nil {
+		t.Errorf("Failed to generate PostgreSQL expressions: %v", err)
+	} else {
+		t.Logf("Successfully generated %d PostgreSQL expressions", cfg.Count)
+	}
+}
+
+func TestPostgreSQLVerboseOutput(t *testing.T) {
+	repoRoot := getRepoRoot()
+	
+	// PostgreSQL grammar file paths
+	lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4")
+	parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4")
+
+	cfg := &config.Config{
+		GrammarFiles:    []string{lexerPath, parserPath},
+		StartRule:       "selectstmt",
+		Count:           2,
+		MaxDepth:        4,
+		OptionalProb:    0.8,
+		MaxQuantifier:   2,
+		MinQuantifier:   1,
+		QuantifierCount: 0,
+		OutputFormat:    config.VerboseOutput, // Show rule traversal
+		Seed:            999,
+	}
+
+	fmt.Printf("\n=== PostgreSQL Verbose Output ===\n")
+	fmt.Printf("Generating with verbose output to show rule traversal\n")
+	fmt.Println()
+
+	gen := generator.New(cfg)
+	err := gen.Generate()
+
+	if err != nil {
+		t.Errorf("Failed to generate PostgreSQL statements with verbose output: %v", err)
+	} else {
+		t.Logf("Successfully generated PostgreSQL statements with verbose output")
+	}
+}
+
+// Benchmark test for performance measurement
+func BenchmarkPostgreSQLGeneration(b *testing.B) {
+	repoRoot := getRepoRoot()
+	
+	lexerPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLLexer.g4")
+	parserPath := filepath.Join(repoRoot, "postgresql", "PostgreSQLParser.g4")
+
+	cfg := &config.Config{
+		GrammarFiles:    []string{lexerPath, parserPath},
+		StartRule:       "selectstmt",
+		Count:           1,
+		MaxDepth:        6,
+		OptionalProb:    0.5,
+		MaxQuantifier:   3,
+		MinQuantifier:   1,
+		QuantifierCount: 0,
+		OutputFormat:    config.CompactOutput,
+		Seed:            42,
+	}
+
+	gen := generator.New(cfg)
+
+	// Reset the timer to exclude setup time
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		err := gen.Generate()
+		if err != nil {
+			b.Fatalf("Generation failed: %v", err)
+		}
+	}
+}
\ No newline at end of file
diff --git a/tools/grammar/README.md b/tools/grammar/README.md
deleted file mode 100644
index 2641250..0000000
--- a/tools/grammar/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# ANTLR v4 Grammar Parser
-
-A Go implementation to parse ANTLR v4 grammar files (`.g4` files) in this repository.
-
-## Source
-
-The lexer and parser grammars come from: https://github.com/antlr/grammars-v4/blob/master/antlr/antlr4
-
-## Why Custom NextToken()?
-
-We added `func (l *LexerAdaptor) NextToken() antlr.Token` in `lexer_adaptor.go` because:
-
-- ANTLR grammar parsing requires context-sensitive lexing
-- Need to convert `ID` tokens to `TOKEN_REF` (uppercase) or `RULE_REF` (lowercase)
-- Go ANTLR doesn't automatically call `Emit()` like Java ANTLR does
-- Go tokens are immutable, so we use a `TokenTypeWrapper` to override token types
-
-## Why Sed Command in Makefile?
-
-We added this sed command in the Makefile:
-```bash
-sed -i '' 's/l\.BaseLexer = antlr\.NewBaseLexer(input)/l.LexerAdaptor = *NewLexerAdaptor(input)/' antlrv4_lexer.go
-```
-
-Because:
-- ANTLR code generation creates `l.BaseLexer = antlr.NewBaseLexer(input)` 
-- We need `l.LexerAdaptor = *NewLexerAdaptor(input)` to use our custom lexer
-- This automatically fixes the generated constructor after each regeneration
-
-## Usage
-
-```bash
-make build  # Generate parser and apply fixes
-make test   # Test all .g4 files in repository (should show 100% success)
-make all    # Build and test
-```
\ No newline at end of file

From 029ad0c8e69a5acace5cd78cb38ce797e2f1d51f Mon Sep 17 00:00:00 2001
From: h3n4l <oysterdays@gmail.com>
Date: Fri, 29 Aug 2025 14:34:21 +0800
Subject: [PATCH 9/9] chore: go mod tidy

---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 9e2614c..8cefc8a 100644
--- a/go.mod
+++ b/go.mod
@@ -4,12 +4,12 @@ go 1.24.5
 
 require (
 	github.com/antlr4-go/antlr/v4 v4.13.1
+	github.com/pkg/errors v0.9.1
 	github.com/stretchr/testify v1.10.0
 )
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect