Skip to content

Commit d7b1c66

Browse files
committed
feat(parser): Implement chunk-based CSS text scanner to avoid Tree-sitter memory explosions
- Extracted CSS parsing from html/analyzer.go to a dedicated css/analyzer.go - Replaces GLR AST generation with linear bracket-depth text scanning - Caps huge CSS rule chunks to 8KB to prevent vector DB overload - Removed old unused css_regex.go implementation - Registered the new generic CSS parser globally in the daemon Resolves Trello Task 1, Task 2, Task 3
1 parent fe56d6e commit d7b1c66

6 files changed

Lines changed: 160 additions & 173 deletions

File tree

cmd/rag-code-mcp/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616
)
1717

1818
var (
19-
Version = "2.1.78"
19+
Version = "2.1.79"
2020
Commit = "none"
2121
Date = "24.10.2025"
2222
)

internal/daemon/run.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/doITmagic/rag-code-mcp/internal/utils"
2424
"github.com/doITmagic/rag-code-mcp/pkg/indexer"
2525
"github.com/doITmagic/rag-code-mcp/pkg/llm"
26+
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/css"
2627
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/docs"
2728
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/go"
2829
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/html"

pkg/parser/css/analyzer.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package css
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"path/filepath"
8+
"strings"
9+
10+
"github.com/doITmagic/rag-code-mcp/pkg/parser"
11+
)
12+
13+
func init() {
14+
parser.Register(NewAnalyzer())
15+
}
16+
17+
// Analyzer implementeaza procesarea pe bucati (chunk-based) a fisierelor CSS/SCSS/LESS.
18+
// Fara sa depinda de Tree-sitter, nu face OOM nici macar la bundle-uri gigantice.
19+
type Analyzer struct{}
20+
21+
func NewAnalyzer() *Analyzer {
22+
return &Analyzer{}
23+
}
24+
25+
func (a *Analyzer) Name() string {
26+
return "css"
27+
}
28+
29+
func (a *Analyzer) CanHandle(filePath string) bool {
30+
ext := strings.ToLower(filepath.Ext(filePath))
31+
return ext == ".css" || ext == ".scss" || ext == ".less" || ext == ".sass"
32+
}
33+
34+
func (a *Analyzer) Analyze(ctx context.Context, path string) (*parser.Result, error) {
35+
content, err := os.ReadFile(path)
36+
if err != nil {
37+
return nil, fmt.Errorf("read css: %w", err)
38+
}
39+
40+
text := string(content)
41+
if strings.TrimSpace(text) == "" {
42+
return &parser.Result{Language: "css"}, nil
43+
}
44+
45+
var symbols []parser.Symbol
46+
47+
var selectorBuilder strings.Builder
48+
var contentBuilder strings.Builder
49+
braceDepth := 0
50+
startLine := 1
51+
currentLine := 1
52+
inComment := false
53+
54+
baseName := filepath.Base(path)
55+
56+
for i := 0; i < len(text); i++ {
57+
c := text[i]
58+
59+
if c == '\n' {
60+
currentLine++
61+
}
62+
63+
// Skip over slash-star comments
64+
if !inComment && c == '/' && i+1 < len(text) && text[i+1] == '*' {
65+
inComment = true
66+
i++ // skip '*'
67+
continue
68+
}
69+
if inComment && c == '*' && i+1 < len(text) && text[i+1] == '/' {
70+
inComment = false
71+
i++ // skip '/'
72+
continue
73+
}
74+
if inComment {
75+
continue
76+
}
77+
78+
if c == '{' {
79+
if braceDepth == 0 {
80+
contentBuilder.WriteByte(c)
81+
} else {
82+
contentBuilder.WriteByte(c)
83+
}
84+
braceDepth++
85+
continue
86+
}
87+
88+
if c == '}' {
89+
braceDepth--
90+
contentBuilder.WriteByte(c)
91+
92+
if braceDepth == 0 {
93+
selector := strings.TrimSpace(selectorBuilder.String())
94+
if len(selector) > 200 {
95+
selector = selector[:197] + "..."
96+
}
97+
98+
blockContent := strings.TrimSpace(contentBuilder.String())
99+
if len(blockContent) > 8192 {
100+
blockContent = blockContent[:8192] + "\n...[TRUNCATED]"
101+
}
102+
103+
if selector != "" {
104+
symbols = append(symbols, parser.Symbol{
105+
Name: baseName,
106+
Type: "style_rule",
107+
FilePath: path,
108+
Language: "css",
109+
Content: selector + " " + blockContent,
110+
Signature: selector,
111+
StartLine: startLine,
112+
EndLine: currentLine,
113+
IsPublic: true,
114+
Metadata: map[string]interface{}{
115+
"selector": selector,
116+
},
117+
})
118+
}
119+
120+
selectorBuilder.Reset()
121+
contentBuilder.Reset()
122+
startLine = currentLine
123+
}
124+
continue
125+
}
126+
127+
if braceDepth == 0 {
128+
selectorBuilder.WriteByte(c)
129+
} else {
130+
contentBuilder.WriteByte(c)
131+
}
132+
}
133+
134+
// Any leftover content (like variables at the root without braces)
135+
leftover := strings.TrimSpace(selectorBuilder.String())
136+
if leftover != "" && len(symbols) == 0 {
137+
if len(leftover) > 8192 {
138+
leftover = leftover[:8192] + "\n...[TRUNCATED]"
139+
}
140+
symbols = append(symbols, parser.Symbol{
141+
Name: baseName,
142+
Type: "style_rule",
143+
FilePath: path,
144+
Language: "css",
145+
Content: leftover,
146+
Signature: "global",
147+
StartLine: startLine,
148+
EndLine: currentLine,
149+
IsPublic: true,
150+
})
151+
}
152+
153+
return &parser.Result{
154+
Symbols: symbols,
155+
Language: "css",
156+
}, nil
157+
}

pkg/parser/html/analyzer.go

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -99,88 +99,7 @@ func (a *Analyzer) Analyze(ctx context.Context, path string) (*pkgParser.Result,
9999
}, nil
100100
}
101101

102-
// analyzeCSS parses CSS/SCSS/LESS files using tree-sitter.
103-
func (a *Analyzer) analyzeCSS(path string) (*pkgParser.Result, error) {
104-
content, err := os.ReadFile(path)
105-
if err != nil {
106-
return nil, fmt.Errorf("css read %s: %w", path, err)
107-
}
108-
109-
text := strings.TrimSpace(string(content))
110-
if text == "" {
111-
return &pkgParser.Result{Language: "html"}, nil
112-
}
113-
114-
// Use tree-sitter for proper CSS/SCSS parsing
115-
langInfo := grammars.DetectLanguage(path)
116-
if langInfo == nil {
117-
// Fallback: if tree-sitter doesn't recognize the extension, skip
118-
return &pkgParser.Result{Language: "html"}, nil
119-
}
120-
121-
langObj := langInfo.Language()
122-
tsParser := a.getOrCreateParser(langInfo)
123-
tree, err := tsParser.Parse(content)
124-
if err != nil {
125-
return nil, fmt.Errorf("css treesitter parse %s: %w", path, err)
126-
}
127-
defer tree.Release()
128-
129-
baseName := filepath.Base(path)
130-
langName := langInfo.Name
131-
var symbols []pkgParser.Symbol
132-
root := tree.RootNode()
133-
134-
// Walk tree-sitter nodes, extracting rule blocks as symbols
135-
for i := 0; i < root.ChildCount(); i++ {
136-
node := root.Child(i)
137-
nodeType := node.Type(langObj)
138-
nodeText := strings.TrimSpace(node.Text(content))
139102

140-
if len(nodeText) < 5 {
141-
continue
142-
}
143-
144-
// Truncate very large blocks
145-
if len(nodeText) > 4096 {
146-
nodeText = nodeText[:4096] + "\n...[TRUNCATED]"
147-
}
148-
149-
startLine := int(node.StartPoint().Row) + 1
150-
endLine := int(node.EndPoint().Row) + 1
151-
152-
// Extract selector from CSS rule nodes
153-
selector := nodeType
154-
if node.ChildCount() > 0 {
155-
firstChild := node.Child(0)
156-
firstChildText := strings.TrimSpace(firstChild.Text(content))
157-
if firstChildText != "" && len(firstChildText) < 200 {
158-
selector = firstChildText
159-
}
160-
}
161-
162-
symbols = append(symbols, pkgParser.Symbol{
163-
Name: baseName,
164-
Type: "style_rule",
165-
FilePath: path,
166-
Language: langName,
167-
Content: nodeText,
168-
Signature: selector,
169-
StartLine: startLine,
170-
EndLine: endLine,
171-
IsPublic: true,
172-
Metadata: map[string]interface{}{
173-
"selector": selector,
174-
"node_type": nodeType,
175-
},
176-
})
177-
}
178-
179-
return &pkgParser.Result{
180-
Symbols: symbols,
181-
Language: "html",
182-
}, nil
183-
}
184103

185104
// CodeAnalyzer handles the heavy lifting of HTML analysis.
186105
type CodeAnalyzer struct{}

pkg/parser/html/css_regex.go

Lines changed: 0 additions & 90 deletions
This file was deleted.

pkg/parser/parser_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func TestRegistry(t *testing.T) {
4141
t.Run("GetByFile", func(t *testing.T) {
4242
assert.Equal(t, a1, GetByFile("test.go"))
4343
assert.Equal(t, a2, GetByFile("main.py"))
44-
assert.Nil(t, GetByFile("style.css"))
44+
assert.Nil(t, GetByFile("style.unknown"))
4545
})
4646
}
4747

0 commit comments

Comments
 (0)