Skip to content

Commit 92fe124

Browse files
authored
Merge branch 'main' into thrift
2 parents 90396ac + a138ec7 commit 92fe124

11 files changed

Lines changed: 259 additions & 63 deletions

File tree

docs/parser-zh.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,9 @@
3131

3232
### LaunguageSpec
3333

34-
```
3534
用于在 LSP 符号收集过程中转换为 UniAST 所需信息,并且这些信息非 LSP 通用定义
3635

3736
```go
38-
3937
// Detailed implementation used for collect LSP symbols and transform them to UniAST
4038
type LanguageSpec interface {
4139
// initialize a root workspace, and return all modules [modulename=>abs-path] inside
@@ -50,7 +48,7 @@ type LanguageSpec interface {
5048
ShouldSkip(path string) bool
5149

5250
// FileImports parse file codes to get its imports
53-
FileImports(content []byte) ([]uniast.Import, error)
51+
FileImports(content []byte) ([]uniast.Import, error)
5452

5553
// return the first declaration token of a symbol, as Type-Name
5654
DeclareTokenOfSymbol(sym DocumentSymbol) int

docs/uniast-en.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Universal Abstract-Syntax-Tree Specification (v0.1.2)
1+
# Universal Abstract-Syntax-Tree Specification (v0.1.3)
22

33
Universal Abstract-Syntax-Tree is a LLM-friendly, language-agnostic code context data structure established by ABCoder. It represents a unified abstract syntax tree of a repository's code, collecting definitions of language entities (functions, types, constants/variables) and their interdependencies for subsequent AI understanding and coding-workflow development.
44

@@ -105,6 +105,9 @@ A repository consists of entity Modules and relationship Graph
105105

106106
- Graph: Dependency topology graph of AST Nodes, see [Graph] below
107107

108+
- Path: The file directory of the repository, usually should be an absolute path
109+
110+
- ASTVersion: The UniAST version used to parse
108111

109112
### Module
110113

docs/uniast-zh.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Universal Abstract-Syntax-Tree Specification (v0.1.2)
1+
# Universal Abstract-Syntax-Tree Specification (v0.1.3)
22

33
Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的定义及其相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。
44

@@ -105,6 +105,10 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言
105105

106106
- Graph: AST Node 的依赖拓扑图,见下文【Graph】
107107

108+
- Path: 仓库的文件目录,通常应该为绝对路径
109+
110+
- ASTVersion: 解析时使用的 UniAST 版本
111+
108112

109113
### Module
110114

lang/collect/collect.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,11 @@ func (c *Collector) Collect(ctx context.Context) error {
168168

169169
file := c.files[path]
170170
if file == nil {
171-
file = uniast.NewFile(path)
171+
rel, err := filepath.Rel(c.repo, path)
172+
if err != nil {
173+
return err
174+
}
175+
file = uniast.NewFile(rel)
172176
c.files[path] = file
173177
}
174178

@@ -180,8 +184,9 @@ func (c *Collector) Collect(ctx context.Context) error {
180184
uses, err := c.spec.FileImports(content)
181185
if err != nil {
182186
log.Error("parse file %s use statements failed: %v", path, err)
187+
} else {
188+
file.Imports = uses
183189
}
184-
file.Imports = uses
185190

186191
// collect symbols
187192
uri := NewURI(path)
@@ -210,7 +215,7 @@ func (c *Collector) Collect(ctx context.Context) error {
210215
return nil
211216
}
212217
if err := filepath.Walk(c.repo, scanner); err != nil {
213-
return err
218+
log.Error("scan files failed: %v", err)
214219
}
215220

216221
// collect some extra metadata

lang/collect/export.go

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -79,46 +79,34 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) {
7979
_, _ = c.exportSymbol(&repo, symbol, "", visited)
8080
}
8181

82-
// connect file with package on demands
83-
// for p, m := range repo.Modules {
84-
// if p == "" || strings.Contains(p, "@") {
85-
// continue
86-
// }
87-
// for _, f := range m.Files {
88-
// if f.Package != "" {
89-
// continue
90-
// }
91-
// _, pkgpath, err := c.spec.NameSpace(filepath.Join(c.repo, f.Path))
92-
// if err != nil {
93-
// continue
94-
// }
95-
// f.Package = pkgpath
96-
// }
97-
// }
9882
for fp, f := range c.files {
99-
if f.Package != "" {
100-
continue
101-
}
10283
rel, err := filepath.Rel(c.repo, fp)
10384
if err != nil {
10485
continue
10586
}
87+
10688
modpath, pkgpath, err := c.spec.NameSpace(fp)
10789
if err != nil {
10890
continue
10991
}
92+
93+
// connect file to package
11094
if modpath == "" || strings.Contains(modpath, "@") {
11195
continue
11296
}
11397
m, ok := repo.Modules[modpath]
11498
if !ok {
11599
continue
116100
}
101+
102+
m.Files[rel] = f
103+
if pkgpath == "" || f.Package != "" {
104+
continue
105+
}
117106
if _, ok := m.Packages[pkgpath]; !ok {
118107
continue
119108
}
120109
f.Package = pkgpath
121-
m.Files[rel] = f
122110
}
123111

124112
return &repo, nil
@@ -199,25 +187,15 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
199187
repo.Modules[mod] = newModule(mod, "", c.Language)
200188
}
201189
module := repo.Modules[mod]
202-
if repo.Modules[mod].Packages[path] == nil {
203-
repo.Modules[mod].Packages[path] = uniast.NewPackage(path)
190+
if module.Packages[path] == nil {
191+
module.Packages[path] = uniast.NewPackage(path)
204192
}
205193
pkg := repo.Modules[mod].Packages[path]
206194
if c.spec.IsMainFunction(*symbol) {
207195
pkg.IsMain = true
208196
}
209197

210-
var relfile string
211-
if c.internal(symbol.Location) {
212-
relfile, _ = filepath.Rel(c.repo, file)
213-
} else {
214-
relfile = filepath.Base(file)
215-
}
216198
fileLine := c.fileLine(symbol.Location)
217-
// collect files
218-
if module.Files[relfile] == nil {
219-
module.Files[relfile] = uniast.NewFile(relfile)
220-
}
221199

222200
content := symbol.Text
223201
public := c.spec.IsPublicSymbol(*symbol)

lang/golang/parser/ctx.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,8 @@ func (p *GoParser) collectTypes(ctx *fileContext, typ ast.Expr, st *Type, inline
446446

447447
// get type id and tells if it is std or builtin
448448
func (ctx *fileContext) getTypeinfo(typ types.Type) (ti typeInfo) {
449-
tobjs, isPointer, isNamed := getNamedTypes(typ)
449+
visited := make(map[types.Type]bool)
450+
tobjs, isPointer, isNamed := getNamedTypes(typ, visited)
450451
ti.IsPointer = isPointer
451452
ti.Ty = typ
452453
ti.IsNamed = isNamed

lang/golang/parser/utils.go

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -183,63 +183,69 @@ func getTypeKind(n ast.Expr) TypeKind {
183183
}
184184
}
185185

186-
func getNamedTypes(typ types.Type) (tys []types.Object, isPointer bool, isNamed bool) {
186+
func getNamedTypes(typ types.Type, visited map[types.Type]bool) (tys []types.Object, isPointer bool, isNamed bool) {
187+
if visited[typ] {
188+
return nil, false, false
189+
}
190+
191+
visited[typ] = true
192+
187193
switch t := typ.(type) {
188194
case *types.Pointer:
189195
isPointer = true
190-
typs, _, isNamed2 := getNamedTypes(t.Elem())
196+
var typs []types.Object
197+
typs, _, isNamed = getNamedTypes(t.Elem(), visited)
191198
tys = append(tys, typs...)
192-
isNamed = isNamed2
193199
case *types.Slice:
194-
typs, _, _ := getNamedTypes(t.Elem())
200+
typs, _, _ := getNamedTypes(t.Elem(), visited)
195201
tys = append(tys, typs...)
196202
case *types.Array:
197-
typs, _, _ := getNamedTypes(t.Elem())
203+
typs, _, _ := getNamedTypes(t.Elem(), visited)
198204
tys = append(tys, typs...)
199205
case *types.Chan:
200-
typs, _, _ := getNamedTypes(t.Elem())
206+
typs, _, _ := getNamedTypes(t.Elem(), visited)
201207
tys = append(tys, typs...)
202208
case *types.Tuple:
203209
for i := 0; i < t.Len(); i++ {
204-
typs, _, _ := getNamedTypes(t.At(i).Type())
210+
typs, _, _ := getNamedTypes(t.At(i).Type(), visited)
205211
tys = append(tys, typs...)
206212
}
207213
case *types.Map:
208-
typs2, _, _ := getNamedTypes(t.Elem())
209-
typs1, _, _ := getNamedTypes(t.Key())
214+
typs2, _, _ := getNamedTypes(t.Elem(), visited)
215+
typs1, _, _ := getNamedTypes(t.Key(), visited)
210216
tys = append(tys, typs1...)
211217
tys = append(tys, typs2...)
212218
case *types.Named:
213219
tys = append(tys, t.Obj())
214220
isNamed = true
215221
case *types.Struct:
216222
for i := 0; i < t.NumFields(); i++ {
217-
typs, _, _ := getNamedTypes(t.Field(i).Type())
223+
typs, _, _ := getNamedTypes(t.Field(i).Type(), visited)
218224
tys = append(tys, typs...)
219225
}
220226
case *types.Interface:
221227
for i := 0; i < t.NumEmbeddeds(); i++ {
222-
typs, _, _ := getNamedTypes(t.EmbeddedType(i))
228+
typs, _, _ := getNamedTypes(t.EmbeddedType(i), visited)
223229
tys = append(tys, typs...)
224230
}
225231
for i := 0; i < t.NumExplicitMethods(); i++ {
226-
typs, _, _ := getNamedTypes(t.ExplicitMethod(i).Type())
232+
typs, _, _ := getNamedTypes(t.ExplicitMethod(i).Type(), visited)
227233
tys = append(tys, typs...)
228234
}
229235
case *types.TypeParam:
230-
typs, _, _ := getNamedTypes(t.Constraint())
236+
typs, _, _ := getNamedTypes(t.Constraint(), visited)
231237
tys = append(tys, typs...)
232238
case *types.Alias:
233239
var typs []types.Object
234-
typs, isPointer, isNamed = getNamedTypes(t.Rhs())
240+
typs, isPointer, isNamed = getNamedTypes(t.Rhs(), visited)
235241
tys = append(tys, typs...)
236242
case *types.Signature:
237243
for i := 0; i < t.Params().Len(); i++ {
238-
typs, _, _ := getNamedTypes(t.Params().At(i).Type())
244+
typs, _, _ := getNamedTypes(t.Params().At(i).Type(), visited)
239245
tys = append(tys, typs...)
240246
}
241247
for i := 0; i < t.Results().Len(); i++ {
242-
typs, _, _ := getNamedTypes(t.Results().At(i).Type())
248+
typs, _, _ := getNamedTypes(t.Results().At(i).Type(), visited)
243249
tys = append(tys, typs...)
244250
}
245251
}

0 commit comments

Comments
 (0)