Skip to content

Commit 1f2af21

Browse files
author
simuleite
committed
Update IDL
1 parent 18495a1 commit 1f2af21

1 file changed

Lines changed: 270 additions & 0 deletions

File tree

idl/uniast.proto

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
// uniast.proto - UniAST Universal AST Schema
2+
// This file defines the unified AST format for multiple languages
3+
// Currently used by: Go Parser, Rust Parser (via rust-analyzer)
4+
5+
syntax = "proto3";
6+
7+
package uniast;
8+
9+
option go_package = "github.com/cloudwego/abcoder/lang/uniast";
10+
11+
// ============ Core Types ============
12+
// Language is a simple string: "go", "rust", "cxx", "python", "typescript", "java", "kotlin"
13+
14+
// ============ Identity System ============
15+
16+
// Identity uniquely identifies a node in the AST
17+
// Format: {ModPath}?{PkgPath}#{Name}
18+
// Example: "github.com/user/repo?path/to/pkg#FunctionName"
19+
message Identity {
20+
string mod_path = 1; // module id, unique within a repo
21+
string pkg_path = 2; // path id, unique within a module
22+
string name = 3; // symbol id, unique within a package
23+
}
24+
25+
// FileLine represents a filename and line number
26+
message FileLine {
27+
string file = 1; // file path relative to repo root
28+
int32 line = 2; // start line number (1-based)
29+
int32 end_line = 5; // end line number (1-based) [新增]
30+
int32 start_offset = 3; // start byte offset in file
31+
int32 end_offset = 4; // end byte offset in file
32+
}
33+
34+
// ============ Relation System ============
35+
36+
// RelationKind represents the type of relationship between nodes
37+
// Using string for JSON compatibility: "Dependency", "Implement", "Inherit", "Group"
38+
message RelationKind {
39+
string value = 1;
40+
}
41+
42+
// Relation represents a relationship between two nodes
43+
message Relation {
44+
string kind = 1; // kind of relation as string: "Dependency", "Implement", "Inherit", "Group"
45+
// Flat identity fields (for JSON compatibility with Go implementation)
46+
// NOTE: Use uppercase first char so serde(camelCase) produces PascalCase
47+
string ModPath = 6;
48+
string PkgPath = 7;
49+
string Name = 8;
50+
int32 line = 3; // start line-offset relative to current node's codes
51+
string desc = 4; // information about this relation
52+
string codes = 5; // related codes representing this relation
53+
}
54+
55+
// ============ Repository ============
56+
57+
// Repository represents a parsed codebase
58+
message Repository {
59+
string id = 1; // repository name
60+
string ast_version = 2; // uniast version
61+
string tool_version = 3; // abcoder version
62+
string path = 4; // repo absolute path
63+
RepoVersion repo_version = 5; // repository version info
64+
map<string, Module> modules = 6; // module name => module
65+
map<string, Node> graph = 7; // node id => node
66+
67+
// [新增] name → files 反向索引
68+
// 加速 search_symbol API,无需独立 .idx 文件
69+
// 从 File.function_names/type_names/var_names 推导,不冗余
70+
map<string, NameLocations> name_to_locations = 8;
71+
}
72+
73+
// NameLocations represents all locations of a symbol name
74+
// [新增] 用于反向索引 name → files
75+
message NameLocations {
76+
repeated string files = 1; // 包含该 name 的文件列表(去重)
77+
}
78+
79+
// Repository version info
80+
// NOTE: Proto3 JSON conversion uses camelCase by default
81+
// So commit_hash -> CommitHash, parse_time -> ParseTime
82+
message RepoVersion {
83+
string commit_hash = 1; // Git commit hash (JSON: CommitHash)
84+
string parse_time = 2; // Parse timestamp RFC3339 (JSON: ParseTime)
85+
}
86+
87+
// ============ Module ============
88+
89+
// Module represents a single module/package (e.g., Go module, Rust crate)
90+
message Module {
91+
string language = 1; // language: "go", "rust", "cxx", "python", "typescript", "java", "kotlin"
92+
string version = 2; // module version (e.g., "1.0.0")
93+
string name = 3; // module name
94+
string dir = 4; // relative path to repo root
95+
map<string, Package> packages = 5; // package import path => package
96+
map<string, string> dependencies = 6; // module name => module_path@version
97+
map<string, File> files = 7; // relative path => file info
98+
repeated string load_errors = 8; // loading errors
99+
string compress_data = 9; // module compress info
100+
}
101+
102+
// Node represents a symbol node in the graph
103+
// NOTE: For JSON compatibility with Go implementation, use flat format:
104+
// - mod_path, pkg_path, name (扁平 Identity)
105+
// - type (string: "FUNC", "TYPE", "VAR")
106+
// - dependencies, references, implements, inherits, groups (relations)
107+
message Node {
108+
// Flat identity fields (matching Go's embedded Identity)
109+
string mod_path = 1;
110+
string pkg_path = 2;
111+
string name = 3;
112+
113+
// Node type as string: "FUNC", "TYPE", "VAR"
114+
string type = 4;
115+
116+
// Unified relation fields
117+
repeated Relation dependencies = 10;
118+
repeated Relation references = 11;
119+
repeated Relation implements = 12;
120+
repeated Relation inherits = 13;
121+
repeated Relation groups = 14;
122+
123+
}
124+
125+
// ============ Package ============
126+
127+
message Package {
128+
bool is_main = 1;
129+
bool is_test = 2;
130+
string pkg_path = 3; // package import path
131+
map<string, Function> functions = 4; // function name => function
132+
map<string, Type> types = 5; // type name => type
133+
map<string, Var> vars = 6; // var name => var
134+
string compress_data = 7; // package compress info
135+
}
136+
137+
// ============ File ============
138+
139+
// File represents a source file
140+
// Lightweight index: only stores names, full data in Package/Graph
141+
message File {
142+
string path = 1; // relative path to repo root
143+
repeated Import imports = 2; // import statements
144+
145+
// Identity fields (matching Module/Package hierarchy)
146+
string mod_path = 3; // module path
147+
string pkg_path = 4; // package path
148+
149+
// Lightweight symbol name index (reference only, full data in Package)
150+
// [新增] Accelerates get_file_structure API
151+
repeated string function_names = 5;
152+
repeated string type_names = 6;
153+
repeated string var_names = 7;
154+
}
155+
156+
// Import represents an import/use statement
157+
message Import {
158+
string alias = 1; // optional alias
159+
string path = 2; // raw import path
160+
}
161+
162+
// ============ Function ============
163+
164+
// Function represents a function or method
165+
// NOTE: Using flat fields for JSON compatibility with Go implementation
166+
message Function {
167+
bool exported = 1; // if the function is exported
168+
bool is_method = 2; // if it's a method (has receiver)
169+
bool is_interface_method = 3; // if it's an interface method stub
170+
171+
// Flat identity fields (matching Go's embedded Identity)
172+
string mod_path = 4;
173+
string pkg_path = 5;
174+
string name = 6;
175+
176+
// Flat FileLine fields
177+
string file = 7;
178+
int32 line = 8;
179+
180+
string content = 9; // full function source code
181+
string signature = 10; // function signature
182+
183+
Receiver receiver = 11; // method receiver (if is_method)
184+
repeated Relation params = 12; // function parameters
185+
repeated Relation results = 13; // function return values
186+
}
187+
188+
// Receiver represents a method receiver
189+
message Receiver {
190+
bool is_pointer = 1;
191+
Identity type = 2; // receiver type identity
192+
}
193+
194+
// ============ Type ============
195+
196+
// Type represents a struct, enum, trait, or interface
197+
message Type {
198+
bool exported = 1; // if the type is exported
199+
TypeKind type_kind = 2; // struct, interface, enum, typedef
200+
Identity identity = 3; // unique identity
201+
FileLine file_line = 4; // location in source
202+
string content = 5; // type declaration source
203+
204+
// For struct: field types
205+
repeated Relation sub_struct = 6; // field types
206+
repeated Relation inline_struct = 7; // inherited/anonymous fields
207+
208+
// Methods defined on this type
209+
map<string, Identity> methods = 8;
210+
211+
// Legacy: use implements relation instead
212+
// repeated Identity implements = 9;
213+
214+
// LLM compress result
215+
string compress_data = 10;
216+
}
217+
218+
// TypeKind represents the kind of type
219+
enum TypeKind {
220+
STRUCT = 0;
221+
INTERFACE = 1;
222+
TYPEDEF = 2;
223+
ENUM = 3;
224+
}
225+
226+
// ============ Variable ============
227+
228+
// Var represents a variable or constant
229+
message Var {
230+
bool exported = 1; // if the variable is exported
231+
bool is_const = 2; // if it's a constant
232+
bool is_pointer = 3; // if the type is a pointer
233+
Identity identity = 4; // unique identity
234+
FileLine file_line = 5; // location in source
235+
Identity type = 6; // type of the variable
236+
string content = 7; // variable declaration source
237+
238+
// Legacy: use relations instead
239+
// repeated Dependency dependencies = 8;
240+
241+
// For enum variants or grouped constants
242+
repeated Identity groups = 9;
243+
244+
// LLM compress result
245+
string compress_data = 10;
246+
}
247+
248+
// ============ Rust-Specific Extensions ============
249+
250+
// Rust only: Trait represents a Rust trait
251+
message Trait {
252+
Type base = 1; // embed Type
253+
repeated Identity super_traits = 2; // supertraits
254+
map<string, Identity> associated_items = 3; // associated items
255+
}
256+
257+
// Rust only: Impl represents an impl block
258+
message Impl {
259+
bool is_trait_impl = 1; // if it's a trait impl
260+
Identity trait = 2; // trait being implemented (if any)
261+
Identity self_type = 3; // self type
262+
repeated Identity items = 4; // items in the impl block
263+
}
264+
265+
// Rust only: Macro represents a macro definition
266+
message Macro {
267+
Identity identity = 1;
268+
FileLine file_line = 2;
269+
string body = 3; // macro body
270+
}

0 commit comments

Comments
 (0)