Skip to content
45 changes: 39 additions & 6 deletions pkg/parser/html/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ import (
"os"
"path/filepath"
"strings"

"github.com/PuerkitoBio/goquery"

pkgParser "github.com/doITmagic/rag-code-mcp/pkg/parser"
"github.com/doITmagic/rag-code-mcp/pkg/parser/html/gotemplate"
)

func init() {
Expand All @@ -33,26 +36,56 @@ func (a *Analyzer) Name() string {
return "html"
}

// CanHandle returns true for .html files.
// CanHandle returns true for .html, .htm, .tmpl, and .gohtml files.
func (a *Analyzer) CanHandle(filePath string) bool {
ext := strings.ToLower(filepath.Ext(filePath))
switch ext {
case ".html", ".htm":
case ".html", ".htm", ".tmpl", ".gohtml":
return true
default:
return false
}
}

// Analyze extracts symbols (sections) from an HTML file.
// Analyze extracts symbols from an HTML or Go template file.
// For files with {{ }} syntax: uses both GoTemplate and HTML analysis.
// For plain HTML files: uses goquery HTML analysis only.
func (a *Analyzer) Analyze(ctx context.Context, path string) (*pkgParser.Result, error) {
// HTML files: use goquery
var symbols []pkgParser.Symbol

// For single files: detect Go template syntax and run GoTemplate analysis
info, err := os.Stat(path)
if err != nil {
return nil, err
}

if !info.IsDir() {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}

// If Go template syntax detected, run Go template analysis first
if bytes.Contains(data, []byte("{{")) {
goTplAnalyzer := &gotemplate.GoTemplateAnalyzer{}
templates := goTplAnalyzer.Analyze([]string{path})
symbols = append(symbols, gotemplate.ConvertToSymbols(templates)...)
}
Comment thread
doITmagic marked this conversation as resolved.
}

Comment thread
doITmagic marked this conversation as resolved.
Outdated
// Always run HTML DOM analysis too (Go templates contain HTML)
chunks, err := a.ca.AnalyzePaths([]string{path})
Comment thread
doITmagic marked this conversation as resolved.
if err != nil {
// If HTML parsing fails but we got Go template symbols, return those
if len(symbols) > 0 {
return &pkgParser.Result{
Symbols: symbols,
Language: "html",
}, nil
}
return nil, err
}

var symbols []pkgParser.Symbol
for _, ch := range chunks {
symbols = append(symbols, pkgParser.Symbol{
Name: ch.Name,
Expand Down Expand Up @@ -252,7 +285,7 @@ func (ca *CodeAnalyzer) shouldSkipDir(path, root string) bool {

func (ca *CodeAnalyzer) isHTMLFile(name string) bool {
lower := strings.ToLower(name)
for _, ext := range []string{".html", ".htm"} {
for _, ext := range []string{".html", ".htm", ".tmpl", ".gohtml"} {
if strings.HasSuffix(lower, ext) {
return true
}
Expand Down
71 changes: 71 additions & 0 deletions pkg/parser/html/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,75 @@ func TestHTMLAnalyzer_Comprehensive(t *testing.T) {
assert.NotEqual(t, "Skip", s.Name)
}
})

t.Run("CanHandle_GoTemplateExtensions", func(t *testing.T) {
assert.True(t, analyzer.CanHandle("layout.tmpl"))
assert.True(t, analyzer.CanHandle("partial.gohtml"))
assert.True(t, analyzer.CanHandle("page.HTML"))
})

t.Run("HTML_with_GoTemplate_syntax", func(t *testing.T) {
goTplHTML := `<!DOCTYPE html>
<html>
<head><title>{{ .Title }}</title></head>
<body>
<h1>{{ .PageTitle }}</h1>
{{ range .Items }}
<p>{{ .Name }}</p>
{{ end }}
{{ template "footer" . }}
</body>
</html>`
goTplPath := filepath.Join(tmpDir, "gotpl.html")
require.NoError(t, os.WriteFile(goTplPath, []byte(goTplHTML), 0644))

res, err := analyzer.Analyze(context.Background(), goTplPath)
require.NoError(t, err)

// Should have BOTH Go template symbols AND HTML symbols
assert.Greater(t, len(res.Symbols), 1, "expected both Go template and HTML symbols")

// Verify Go template symbol exists with correct metadata
foundGoTpl := false
for _, s := range res.Symbols {
if md, ok := s.Metadata["template_type"]; ok && md == "go_template" {
foundGoTpl = true
// Should have includes relation to "footer"
foundRel := false
for _, rel := range s.Relations {
if rel.TargetName == "footer" {
foundRel = true
}
}
assert.True(t, foundRel, "expected relation to 'footer' template")
}
}
assert.True(t, foundGoTpl, "expected go_template symbol")
})

t.Run("Tmpl_file", func(t *testing.T) {
tmplContent := `{{ define "sidebar" }}
<aside>
{{ range .Widgets }}
<div>{{ .Content }}</div>
{{ end }}
</aside>
{{ end }}`
tmplPath := filepath.Join(tmpDir, "sidebar.tmpl")
require.NoError(t, os.WriteFile(tmplPath, []byte(tmplContent), 0644))

res, err := analyzer.Analyze(context.Background(), tmplPath)
require.NoError(t, err)

// Should produce Go template symbols
assert.Greater(t, len(res.Symbols), 0)

foundDefine := false
for _, s := range res.Symbols {
if md, ok := s.Metadata["define_name"]; ok && md == "sidebar" {
foundDefine = true
}
}
assert.True(t, foundDefine, "expected define 'sidebar' symbol")
})
}
181 changes: 181 additions & 0 deletions pkg/parser/html/gotemplate/adapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package gotemplate

import (
"fmt"
"path/filepath"
"strings"

pkgParser "github.com/doITmagic/rag-code-mcp/pkg/parser"
)

// ConvertToSymbols converts parsed GoTemplate results to parser.Symbol entries
// with structural relations (dependency for {{ template }}, inheritance-like for {{ block }}).
Comment thread
doITmagic marked this conversation as resolved.
Outdated
func ConvertToSymbols(templates []GoTemplate) []pkgParser.Symbol {
var symbols []pkgParser.Symbol

for _, tpl := range templates {
baseName := filepath.Base(tpl.FilePath)
nameNoExt := strings.TrimSuffix(baseName, filepath.Ext(baseName))

// If template has {{ define }} blocks, create a symbol per define.
if len(tpl.Defines) > 0 {
for _, def := range tpl.Defines {
sym := buildDefineSymbol(tpl, def)
symbols = append(symbols, sym)
}
}

// Always create a file-level symbol for the whole template.
sym := buildFileSymbol(tpl, nameNoExt)
symbols = append(symbols, sym)
}

return symbols
}

// buildFileSymbol creates a file-level symbol representing the entire template.
func buildFileSymbol(tpl GoTemplate, nameNoExt string) pkgParser.Symbol {
// Build signature summary
var sigParts []string
sigParts = append(sigParts, "go_template")
if len(tpl.Defines) > 0 {
names := make([]string, len(tpl.Defines))
for i, d := range tpl.Defines {
names[i] = d.Name
}
sigParts = append(sigParts, fmt.Sprintf("defines: %s", strings.Join(names, ", ")))
}
if len(tpl.TemplateIncludes) > 0 {
names := make([]string, len(tpl.TemplateIncludes))
for i, t := range tpl.TemplateIncludes {
names[i] = t.Name
}
sigParts = append(sigParts, fmt.Sprintf("includes: %s", strings.Join(names, ", ")))
}
if len(tpl.Blocks) > 0 {
names := make([]string, len(tpl.Blocks))
for i, b := range tpl.Blocks {
names[i] = b.Name
}
sigParts = append(sigParts, fmt.Sprintf("blocks: %s", strings.Join(names, ", ")))
}

// Build docstring
var docParts []string
if len(tpl.Variables) > 0 {
docParts = append(docParts, fmt.Sprintf("Variables: %s", strings.Join(tpl.Variables, ", ")))
}
if len(tpl.CustomFuncs) > 0 {
docParts = append(docParts, fmt.Sprintf("Custom funcs: %s", strings.Join(tpl.CustomFuncs, ", ")))
}
if len(tpl.Ranges) > 0 {
vars := make([]string, len(tpl.Ranges))
for i, r := range tpl.Ranges {
vars[i] = r.Variable
}
docParts = append(docParts, fmt.Sprintf("Iterates: %s", strings.Join(vars, ", ")))
}

// Build relations: template includes → dependency
var relations []pkgParser.Relation
for _, inc := range tpl.TemplateIncludes {
relations = append(relations, pkgParser.Relation{
TargetName: inc.Name,
Type: pkgParser.RelDependency,
})
}

endLine := tpl.TotalLines
if endLine < 1 {
endLine = 1
}

return pkgParser.Symbol{
Name: nameNoExt,
Type: pkgParser.Type,
FilePath: tpl.FilePath,
Language: "html",
StartLine: 1,
EndLine: endLine,
Signature: strings.Join(sigParts, " | "),
Docstring: strings.Join(docParts, " | "),
IsPublic: true,
Relations: relations,
Metadata: map[string]any{
"template_type": "go_template",
"defines": extractDefineNames(tpl),
"includes": extractIncludeNames(tpl),
"blocks": extractBlockNames(tpl),
"variables": tpl.Variables,
"custom_funcs": tpl.CustomFuncs,
"ranges": extractRangeVars(tpl),
},
}
}

// buildDefineSymbol creates a symbol for a specific {{ define "name" }} block.
func buildDefineSymbol(tpl GoTemplate, def DefineDirective) pkgParser.Symbol {
endLine := def.EndLine
if endLine < def.Line {
endLine = def.Line
}

// Relations: any {{ template "x" }} inside this define are dependencies
var relations []pkgParser.Relation
for _, inc := range tpl.TemplateIncludes {
if inc.Line >= def.Line && inc.Line <= endLine {
relations = append(relations, pkgParser.Relation{
TargetName: inc.Name,
Type: pkgParser.RelDependency,
})
}
}

return pkgParser.Symbol{
Name: def.Name,
Type: pkgParser.Type,
FilePath: tpl.FilePath,
Language: "html",
StartLine: def.Line,
EndLine: endLine,
Signature: fmt.Sprintf(`go_template | {{ define "%s" }}`, def.Name),
IsPublic: true,
Relations: relations,
Metadata: map[string]any{
"template_type": "go_template_define",
"define_name": def.Name,
},
}
}

func extractDefineNames(tpl GoTemplate) []string {
names := make([]string, len(tpl.Defines))
for i, d := range tpl.Defines {
names[i] = d.Name
}
return names
}

func extractIncludeNames(tpl GoTemplate) []string {
names := make([]string, len(tpl.TemplateIncludes))
for i, t := range tpl.TemplateIncludes {
names[i] = t.Name
}
return names
}

func extractBlockNames(tpl GoTemplate) []string {
names := make([]string, len(tpl.Blocks))
for i, b := range tpl.Blocks {
names[i] = b.Name
}
return names
}

func extractRangeVars(tpl GoTemplate) []string {
vars := make([]string, len(tpl.Ranges))
for i, r := range tpl.Ranges {
vars[i] = r.Variable
}
return vars
}
Loading
Loading