From 651208e9da132f03191e873c0acbe20c51118139 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Thu, 29 May 2025 13:43:56 +0800 Subject: [PATCH 1/3] feat:(go) support mock identity for unloaded symobols --- lang/golang/parser/ctx.go | 33 ++++++++++++++++++++++++++++++++- lang/golang/parser/parser.go | 19 ++++++++++--------- lang/uniast/ast.go | 2 +- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/lang/golang/parser/ctx.go b/lang/golang/parser/ctx.go index 0a118002..9302a9e5 100644 --- a/lang/golang/parser/ctx.go +++ b/lang/golang/parser/ctx.go @@ -252,10 +252,41 @@ func (ctx *fileContext) GetTypeId(typ ast.Expr) (x Identity, isPointer bool, isS if tinfo, ok := ctx.pkgTypeInfo.Types[typ]; ok { return ctx.getIdFromType(tinfo.Type) } else { - panic("cannot find type info for " + string(ctx.GetRawContent(typ))) + // NOTICE: for unloaded type, we only mock the type name + fmt.Fprintf(os.Stderr, "cannot find type info for %s\n", ctx.GetRawContent(typ)) + return ctx.mockType(typ) } } +func (ctx *fileContext) mockType(typ ast.Expr) (x Identity, isPointer bool, isStdOrBuiltin bool) { + switch ty := typ.(type) { + case *ast.StarExpr: + id, _, std := ctx.mockType(ty.X) + return id, true, std + case *ast.CallExpr: + // try get func type + id, _, std := ctx.mockType(ty.Fun) + return id, false, std + case *ast.SelectorExpr: + // try get import path + switch xx := ty.X.(type) { + case *ast.Ident: + impt, mod, err := ctx.imports.GetImportPath(xx.Name, "") + if err != nil { + goto fallback + } + return NewIdentity(mod, PkgPath(impt), ty.Sel.Name), false, false + case *ast.SelectorExpr: + // recurse + id, _, std := ctx.mockType(xx) + return NewIdentity(id.ModPath, id.PkgPath, ty.Sel.Name), false, std + } + } + +fallback: + return NewIdentity("UNLOADED", ctx.pkgPath, string(ctx.GetRawContent(typ))), false, true +} + func (ctx *fileContext) collectFields(fields []*ast.Field, m *[]Dependency) { for _, fieldDecl := range fields { id, _, isStdOrBuiltin := ctx.GetTypeId(fieldDecl.Type) diff --git a/lang/golang/parser/parser.go b/lang/golang/parser/parser.go index d1865597..9112247c 100644 --- a/lang/golang/parser/parser.go +++ b/lang/golang/parser/parser.go @@ -143,16 +143,16 @@ func (p *GoParser) ParseRepo() (Repository, error) { } func (p *GoParser) ParseModule(mod *Module, dir string) (err error) { - filepath.Walk(dir, func(path string, info fs.FileInfo, e error) error { - // run go mod tidy before parse - cmd := exec.Command("go", "mod", "tidy") - cmd.Dir = dir - buf := bytes.NewBuffer(nil) - cmd.Stderr = buf - if err := cmd.Run(); err != nil { - return fmt.Errorf("run go mod tidy failed in %s: %v", dir, buf.String()) - } + // run go mod tidy before parse + cmd := exec.Command("go", "mod", "tidy") + cmd.Dir = dir + buf := bytes.NewBuffer(nil) + cmd.Stderr = buf + if err := cmd.Run(); err != nil { + fmt.Fprintf(os.Stderr, "run go mod tidy failed in %s: %v\n", dir, buf.String()) + } + filepath.Walk(dir, func(path string, info fs.FileInfo, e error) error { if info != nil && info.IsDir() && filepath.Base(path) == ".git" { return filepath.SkipDir } @@ -163,6 +163,7 @@ func (p *GoParser) ParseModule(mod *Module, dir string) (err error) { mod.Files[rel] = NewFile(rel) return nil }) + return p.loadPackages(mod, dir, "./...") } diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 6a064d8d..1c62aaff 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -279,7 +279,7 @@ type Identity struct { func NewIdentity(mod, pkg, name string) Identity { if mod == "" { - fmt.Fprintf(os.Stderr, "module name cannot be empty: %s.%s\n", pkg, name) + fmt.Fprintf(os.Stderr, "module name cannot be empty: %s#%s\n", pkg, name) // panic(fmt.Sprintf("module name cannot be empty: %s.%s", pkg, name)) } return Identity{ModPath: mod, PkgPath: pkg, Name: name} From ffa66d83c5d5909803a4e4d904a743c9383558a6 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Thu, 29 May 2025 15:26:16 +0800 Subject: [PATCH 2/3] fix: get all deps from go.mod --- lang/golang/parser/utils.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lang/golang/parser/utils.go b/lang/golang/parser/utils.go index 204502ea..f27941b6 100644 --- a/lang/golang/parser/utils.go +++ b/lang/golang/parser/utils.go @@ -143,9 +143,9 @@ func parseModuleFile(data []byte) (map[string]string, error) { } modules := make(map[string]string) for _, req := range ast.Require { - if req.Indirect { - continue - } + // if req.Indirect { + // continue + // } modules[req.Mod.Path] = req.Mod.Path + "@" + req.Mod.Version } // replaces From 56d829361915ceb2074edb45ab8700e8686621cb Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Thu, 29 May 2025 21:49:23 +0800 Subject: [PATCH 3/3] opt: support collect deps when get type id --- lang/golang/parser/ctx.go | 174 ++++++++++++++++++++---------------- lang/golang/parser/file.go | 48 +++++----- lang/golang/parser/utils.go | 63 +++++++++++-- lang/uniast/ast.go | 5 +- 4 files changed, 184 insertions(+), 106 deletions(-) diff --git a/lang/golang/parser/ctx.go b/lang/golang/parser/ctx.go index 9302a9e5..57f5a523 100644 --- a/lang/golang/parser/ctx.go +++ b/lang/golang/parser/ctx.go @@ -247,10 +247,17 @@ func GetRawContent(fset *token.FileSet, file []byte, node ast.Node, collectComme // return ctx.bs[ctx.fset.Position(from).Offset:ctx.fset.Position(to).Offset] // } +type typeInfo struct { + Id Identity + IsPointer bool + IsStdOrBuiltin bool + Deps []Identity +} + // FIXME: for complex type like map[XX]YY , we only extract first-meet type here -func (ctx *fileContext) GetTypeId(typ ast.Expr) (x Identity, isPointer bool, isStdOrBuiltin bool) { +func (ctx *fileContext) GetTypeInfo(typ ast.Expr) typeInfo { if tinfo, ok := ctx.pkgTypeInfo.Types[typ]; ok { - return ctx.getIdFromType(tinfo.Type) + return ctx.getTypeinfo(tinfo.Type) } else { // NOTICE: for unloaded type, we only mock the type name fmt.Fprintf(os.Stderr, "cannot find type info for %s\n", ctx.GetRawContent(typ)) @@ -258,15 +265,17 @@ func (ctx *fileContext) GetTypeId(typ ast.Expr) (x Identity, isPointer bool, isS } } -func (ctx *fileContext) mockType(typ ast.Expr) (x Identity, isPointer bool, isStdOrBuiltin bool) { +func (ctx *fileContext) mockType(typ ast.Expr) typeInfo { switch ty := typ.(type) { case *ast.StarExpr: - id, _, std := ctx.mockType(ty.X) - return id, true, std + ti := ctx.mockType(ty.X) + ti.IsPointer = true + return ti case *ast.CallExpr: // try get func type - id, _, std := ctx.mockType(ty.Fun) - return id, false, std + ti := ctx.mockType(ty.Fun) + ti.IsPointer = false + return ti case *ast.SelectorExpr: // try get import path switch xx := ty.X.(type) { @@ -275,30 +284,36 @@ func (ctx *fileContext) mockType(typ ast.Expr) (x Identity, isPointer bool, isSt if err != nil { goto fallback } - return NewIdentity(mod, PkgPath(impt), ty.Sel.Name), false, false + return typeInfo{NewIdentity(mod, PkgPath(impt), ty.Sel.Name), false, false, nil} case *ast.SelectorExpr: // recurse - id, _, std := ctx.mockType(xx) - return NewIdentity(id.ModPath, id.PkgPath, ty.Sel.Name), false, std + ti := ctx.mockType(xx) + ti.Id.Name = ty.Sel.Name + ti.IsPointer = false + return ti } } fallback: - return NewIdentity("UNLOADED", ctx.pkgPath, string(ctx.GetRawContent(typ))), false, true + return typeInfo{NewIdentity("UNLOADED", ctx.pkgPath, string(ctx.GetRawContent(typ))), false, true, nil} } func (ctx *fileContext) collectFields(fields []*ast.Field, m *[]Dependency) { for _, fieldDecl := range fields { - id, _, isStdOrBuiltin := ctx.GetTypeId(fieldDecl.Type) - if isStdOrBuiltin || id.PkgPath == "" { - continue + ti := ctx.GetTypeInfo(fieldDecl.Type) + if !ti.IsStdOrBuiltin && ti.Id.ModPath != "" { + *m = InsertDependency(*m, Dependency{ + Identity: ti.Id, + FileLine: ctx.FileLine(fieldDecl), + }) + } + for _, dep := range ti.Deps { + *m = InsertDependency(*m, Dependency{ + Identity: dep, + FileLine: ctx.FileLine(fieldDecl), + }) } - *m = append(*m, Dependency{ - Identity: id, - FileLine: ctx.FileLine(fieldDecl), - }) } - return } type importInfo struct { @@ -410,74 +425,81 @@ func getTypeName(fset *token.FileSet, file []byte, typ ast.Expr) (ret []Identity } func (p *GoParser) collectTypes(ctx *fileContext, typ ast.Expr, st *Type, inlined bool) { - id, _, isGoBuiltins := ctx.GetTypeId(typ) - dep := NewDependency(id, ctx.FileLine(typ)) - if isGoBuiltins || id.PkgPath == "" { - return - } - if err := p.referCodes(ctx, &id, p.opts.ReferCodeDepth); err != nil { - fmt.Fprintf(os.Stderr, "failed to get refer code for %s: %v\n", id.Name, err) + ti := ctx.GetTypeInfo(typ) + if !ti.IsStdOrBuiltin && ti.Id.ModPath != "" { + dep := NewDependency(ti.Id, ctx.FileLine(typ)) + if err := p.referCodes(ctx, &ti.Id, p.opts.ReferCodeDepth); err != nil { + fmt.Fprintf(os.Stderr, "failed to get refer code for %s: %v\n", ti.Id, err) + } + if inlined { + st.InlineStruct = InsertDependency(st.InlineStruct, dep) + } else { + st.SubStruct = InsertDependency(st.SubStruct, dep) + } } - if inlined { - st.InlineStruct = append(st.InlineStruct, dep) - } else { - st.SubStruct = append(st.SubStruct, dep) + for _, dep := range ti.Deps { + if err := p.referCodes(ctx, &dep, p.opts.ReferCodeDepth); err != nil { + fmt.Fprintf(os.Stderr, "failed to get refer code for %s: %v\n", dep, err) + } + if inlined { + st.InlineStruct = InsertDependency(st.InlineStruct, NewDependency(dep, ctx.FileLine(typ))) + } else { + st.SubStruct = InsertDependency(st.SubStruct, NewDependency(dep, ctx.FileLine(typ))) + } } } -var compositeTypePrefixs = []string{"[]", "map[", "chan ", "<-chan", "chan<-", "func("} - // get type id and tells if it is std or builtin -func (ctx *fileContext) getIdFromType(typ types.Type) (x Identity, isPointer bool, isStrOrBuiltin bool) { - if tobj, isPointer := getNamedType(typ); tobj != nil { - if isGoBuiltins(tobj.Name()) { - return Identity{Name: tobj.Name()}, isPointer, true - } - name := tobj.Name() - // NOTICE: filter composite type (map[] slice func chan ...) - // TODO: support extract sub named type - for _, prefix := range compositeTypePrefixs { - if strings.HasPrefix(name, prefix) { - return Identity{Name: name}, isPointer, true +func (ctx *fileContext) getTypeinfo(typ types.Type) (ti typeInfo) { + tobjs, isPointer := getNamedTypes(typ) + ti.IsPointer = isPointer + if len(tobjs) > 0 { + tobj := tobjs[0] + if tp := tobj.Pkg(); tp != nil { + mod, err := ctx.GetMod(tp.Path()) + if err == errSysImport { + ti.Id = Identity{"", tp.Path(), tobj.Name()} + ti.IsStdOrBuiltin = true + } else if err != nil || mod == "" { + // unloaded type, mark it + ti.Id = Identity{"", tp.Path(), tobj.Name()} + ti.IsStdOrBuiltin = false + } else { + ti.Id = NewIdentity(mod, tp.Path(), tobj.Name()) + ti.IsStdOrBuiltin = false } - } - // get mod and pkg from tobj.Pkg() - tp := tobj.Pkg() - if tp == nil { - return NewIdentity(ctx.module.Name, ctx.pkgPath, name), isPointer, false - } - mod, err := ctx.GetMod(tp.Path()) - if err == errSysImport { - return Identity{Name: name, PkgPath: tp.Path()}, isPointer, true - } else if err != nil { - return Identity{Name: name}, isPointer, false - } - return NewIdentity(mod, tp.Path(), tobj.Name()), isPointer, false - } else { - typStr := typ.String() - isPointer := strings.HasPrefix(typStr, "*") - typStr = strings.TrimPrefix(typStr, "*") - if isGoBuiltins(typStr) { - return Identity{Name: typStr}, isPointer, true - } - for _, prefix := range compositeTypePrefixs { - if strings.HasPrefix(typStr, prefix) { - return Identity{Name: typStr}, isPointer, true + } else { + if isGoBuiltins(tobj.Name()) { + ti.Id = Identity{Name: tobj.Name()} + ti.IsStdOrBuiltin = true + } else { + // unloaded type, mark it + ti.Id = Identity{"", ctx.pkgPath, tobj.Name()} + ti.IsStdOrBuiltin = false } } - if idx := strings.LastIndex(typStr, "."); idx > 0 { - pkg := typStr[:idx] - if isSysPkg(pkg) { - return Identity{Name: typStr[idx+1:], PkgPath: pkg}, isPointer, true + // NOTICE: only extract Named type here + for i := 1; i < len(tobjs); i++ { + tobj := tobjs[i] + if isGoBuiltins(tobj.Name()) { + continue } - // FIXME: some types (ex: return type of a func-calling) cannot be found go mod here. - // Ignore empty mod for now. - mod, _ := ctx.GetMod(pkg) - return NewIdentity(mod, pkg, typStr[idx+1:]), isPointer, false - } else { - return NewIdentity(ctx.module.Name, ctx.pkgPath, typStr), isPointer, false + // get mod and pkg from tobj.Pkg() + tp := tobj.Pkg() + if tp == nil { + continue + } + mod, err := ctx.GetMod(tp.Path()) + if err != nil || mod == "" { + continue + } + ti.Deps = append(ti.Deps, NewIdentity(mod, tp.Path(), tobj.Name())) } + } else { + ti.Id = Identity{"", "", typ.String()} + ti.IsStdOrBuiltin = true } + return } func (ctx *fileContext) IsSysImport(alias string) bool { diff --git a/lang/golang/parser/file.go b/lang/golang/parser/file.go index b67a42af..605b857e 100644 --- a/lang/golang/parser/file.go +++ b/lang/golang/parser/file.go @@ -101,13 +101,19 @@ func (p *GoParser) parseVar(ctx *fileContext, vspec *ast.ValueSpec, isConst bool v := p.newVar(ctx.module.Name, ctx.pkgPath, name.Name, isConst) v.FileLine = ctx.FileLine(vspec) if vspec.Type != nil { - id, isPointer, _ := ctx.GetTypeId(vspec.Type) - v.Type = &id - v.IsPointer = isPointer + ti := ctx.GetTypeInfo(vspec.Type) + v.Type = &ti.Id + v.IsPointer = ti.IsPointer + for _, dep := range ti.Deps { + v.Dependencies = InsertDependency(v.Dependencies, NewDependency(dep, ctx.FileLine(vspec.Type))) + } } else if val != nil && !isConst { - id, isPointer, _ := ctx.GetTypeId(*val) - v.Type = &id - v.IsPointer = isPointer + ti := ctx.GetTypeInfo(*val) + v.Type = &ti.Id + v.IsPointer = ti.IsPointer + for _, dep := range ti.Deps { + v.Dependencies = InsertDependency(v.Dependencies, NewDependency(dep, ctx.FileLine(*val))) + } } else { v.Type = typ } @@ -277,19 +283,19 @@ func (p *GoParser) parseSelector(ctx *fileContext, expr *ast.SelectorExpr, infos } // callName := string(ctx.GetRawContent(expr)) // get receiver type name - var rname string - rev, _ := getNamedType(sel.Recv()) - if rev == nil { - rname = extractName(sel.Recv().String()) - } else { - rname = rev.Name() - } - id := NewIdentity(mod, pkg, rname+"."+expr.Sel.Name) - dep := NewDependency(id, ctx.FileLine(expr.Sel)) - if err := p.referCodes(ctx, &id, p.opts.ReferCodeDepth); err != nil { - fmt.Fprintf(os.Stderr, "failed to get refer code for %s: %v\n", id.Name, err) + // var rname string + rev := ctx.getTypeinfo(sel.Recv()) + // if rev == nil { + // rname = extractName(sel.Recv().String()) + // } else { + if !rev.IsStdOrBuiltin && rev.Id.ModPath != "" { + id := NewIdentity(mod, pkg, rev.Id.Name+"."+expr.Sel.Name) + dep := NewDependency(id, ctx.FileLine(expr.Sel)) + if err := p.referCodes(ctx, &id, p.opts.ReferCodeDepth); err != nil { + fmt.Fprintf(os.Stderr, "failed to get refer code for %s: %v\n", id.Name, err) + } + *infos.methodCalls = InsertDependency(*infos.methodCalls, dep) } - *infos.methodCalls = InsertDependency(*infos.methodCalls, dep) return false } @@ -308,14 +314,14 @@ func (p *GoParser) parseFunc(ctx *fileContext, funcDecl *ast.FuncDecl) (*Functio isMethod := funcDecl.Recv != nil if isMethod { // TODO: reserve the pointer message? - id, isPointer, _ := ctx.GetTypeId(funcDecl.Recv.List[0].Type) + ti := ctx.GetTypeInfo(funcDecl.Recv.List[0].Type) // name := "self" // if len(funcDecl.Recv.List[0].Names) > 0 { // name = funcDecl.Recv.List[0].Names[0].Name // } receiver = &Receiver{ - Type: id, - IsPointer: isPointer, + Type: ti.Id, + IsPointer: ti.IsPointer, // Name: name, } } diff --git a/lang/golang/parser/utils.go b/lang/golang/parser/utils.go index f27941b6..66607ed6 100644 --- a/lang/golang/parser/utils.go +++ b/lang/golang/parser/utils.go @@ -183,15 +183,64 @@ func getTypeKind(n ast.Expr) TypeKind { } } -func getNamedType(typ types.Type) (ty types.Object, isPointer bool) { - if pt, ok := typ.(*types.Pointer); ok { - typ = pt.Elem() +func getNamedTypes(typ types.Type) (tys []types.Object, isPointer bool) { + switch t := typ.(type) { + case *types.Pointer: isPointer = true + typs, _ := getNamedTypes(t.Elem()) + tys = append(tys, typs...) + case *types.Slice: + typs, _ := getNamedTypes(t.Elem()) + tys = append(tys, typs...) + case *types.Array: + typs, _ := getNamedTypes(t.Elem()) + tys = append(tys, typs...) + case *types.Chan: + typs, _ := getNamedTypes(t.Elem()) + tys = append(tys, typs...) + case *types.Tuple: + for i := 0; i < t.Len(); i++ { + typs, _ := getNamedTypes(t.At(i).Type()) + tys = append(tys, typs...) + } + case *types.Map: + typs2, _ := getNamedTypes(t.Elem()) + typs1, _ := getNamedTypes(t.Key()) + tys = append(tys, typs1...) + tys = append(tys, typs2...) + case *types.Named: + tys = append(tys, t.Obj()) + case *types.Struct: + for i := 0; i < t.NumFields(); i++ { + typs, _ := getNamedTypes(t.Field(i).Type()) + tys = append(tys, typs...) + } + case *types.Interface: + for i := 0; i < t.NumEmbeddeds(); i++ { + typs, _ := getNamedTypes(t.EmbeddedType(i)) + tys = append(tys, typs...) + } + for i := 0; i < t.NumExplicitMethods(); i++ { + typs, _ := getNamedTypes(t.ExplicitMethod(i).Type()) + tys = append(tys, typs...) + } + case *types.TypeParam: + typs, _ := getNamedTypes(t.Constraint()) + tys = append(tys, typs...) + case *types.Alias: + typs, _ := getNamedTypes(t.Underlying()) + tys = append(tys, typs...) + case *types.Signature: + for i := 0; i < t.Params().Len(); i++ { + typs, _ := getNamedTypes(t.Params().At(i).Type()) + tys = append(tys, typs...) + } + for i := 0; i < t.Results().Len(); i++ { + typs, _ := getNamedTypes(t.Results().At(i).Type()) + tys = append(tys, typs...) + } } - if name, ok := typ.(*types.Named); ok { - return name.Obj(), isPointer - } - return nil, isPointer + return } func extractName(typ string) string { diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 1c62aaff..4720123a 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -544,8 +544,9 @@ type Var struct { IsPointer bool // if its Type is a pointer type Identity FileLine - Type *Identity `json:",omitempty"` - Content string + Type *Identity `json:",omitempty"` + Content string + Dependencies []Dependency `json:",omitempty"` CompressData *string `json:"compress_data,omitempty"` }