|
| 1 | +// uniast.proto - UniAST Universal AST Schema |
| 2 | +// This file defines the unified AST format for multiple languages |
| 3 | +// Currently used by: Go Parser, Rust Parser (via rust-analyzer) |
| 4 | + |
| 5 | +syntax = "proto3"; |
| 6 | + |
| 7 | +package uniast; |
| 8 | + |
| 9 | +option go_package = "github.com/cloudwego/abcoder/lang/uniast"; |
| 10 | + |
| 11 | +// ============ Core Types ============ |
| 12 | +// Language is a simple string: "go", "rust", "cxx", "python", "typescript", "java", "kotlin" |
| 13 | + |
| 14 | +// ============ Identity System ============ |
| 15 | + |
| 16 | +// Identity uniquely identifies a node in the AST |
| 17 | +// Format: {ModPath}?{PkgPath}#{Name} |
| 18 | +// Example: "github.com/user/repo?path/to/pkg#FunctionName" |
| 19 | +message Identity { |
| 20 | + string mod_path = 1; // module id, unique within a repo |
| 21 | + string pkg_path = 2; // path id, unique within a module |
| 22 | + string name = 3; // symbol id, unique within a package |
| 23 | +} |
| 24 | + |
| 25 | +// FileLine represents a filename and line number |
| 26 | +message FileLine { |
| 27 | + string file = 1; // file path relative to repo root |
| 28 | + int32 line = 2; // start line number (1-based) |
| 29 | + int32 end_line = 5; // end line number (1-based) [新增] |
| 30 | + int32 start_offset = 3; // start byte offset in file |
| 31 | + int32 end_offset = 4; // end byte offset in file |
| 32 | +} |
| 33 | + |
| 34 | +// ============ Relation System ============ |
| 35 | + |
| 36 | +// RelationKind represents the type of relationship between nodes |
| 37 | +// Using string for JSON compatibility: "Dependency", "Implement", "Inherit", "Group" |
| 38 | +message RelationKind { |
| 39 | + string value = 1; |
| 40 | +} |
| 41 | + |
| 42 | +// Relation represents a relationship between two nodes |
| 43 | +message Relation { |
| 44 | + string kind = 1; // kind of relation as string: "Dependency", "Implement", "Inherit", "Group" |
| 45 | + // Flat identity fields (for JSON compatibility with Go implementation) |
| 46 | + // NOTE: Use uppercase first char so serde(camelCase) produces PascalCase |
| 47 | + string ModPath = 6; |
| 48 | + string PkgPath = 7; |
| 49 | + string Name = 8; |
| 50 | + int32 line = 3; // start line-offset relative to current node's codes |
| 51 | + string desc = 4; // information about this relation |
| 52 | + string codes = 5; // related codes representing this relation |
| 53 | +} |
| 54 | + |
| 55 | +// ============ Repository ============ |
| 56 | + |
| 57 | +// Repository represents a parsed codebase |
| 58 | +message Repository { |
| 59 | + string id = 1; // repository name |
| 60 | + string ast_version = 2; // uniast version |
| 61 | + string tool_version = 3; // abcoder version |
| 62 | + string path = 4; // repo absolute path |
| 63 | + RepoVersion repo_version = 5; // repository version info |
| 64 | + map<string, Module> modules = 6; // module name => module |
| 65 | + map<string, Node> graph = 7; // node id => node |
| 66 | + |
| 67 | + // [新增] name → files 反向索引 |
| 68 | + // 加速 search_symbol API,无需独立 .idx 文件 |
| 69 | + // 从 File.function_names/type_names/var_names 推导,不冗余 |
| 70 | + map<string, NameLocations> name_to_locations = 8; |
| 71 | +} |
| 72 | + |
| 73 | +// NameLocations represents all locations of a symbol name |
| 74 | +// [新增] 用于反向索引 name → files |
| 75 | +message NameLocations { |
| 76 | + repeated string files = 1; // 包含该 name 的文件列表(去重) |
| 77 | +} |
| 78 | + |
| 79 | +// Repository version info |
| 80 | +// NOTE: Proto3 JSON conversion uses camelCase by default |
| 81 | +// So commit_hash -> CommitHash, parse_time -> ParseTime |
| 82 | +message RepoVersion { |
| 83 | + string commit_hash = 1; // Git commit hash (JSON: CommitHash) |
| 84 | + string parse_time = 2; // Parse timestamp RFC3339 (JSON: ParseTime) |
| 85 | +} |
| 86 | + |
| 87 | +// ============ Module ============ |
| 88 | + |
| 89 | +// Module represents a single module/package (e.g., Go module, Rust crate) |
| 90 | +message Module { |
| 91 | + string language = 1; // language: "go", "rust", "cxx", "python", "typescript", "java", "kotlin" |
| 92 | + string version = 2; // module version (e.g., "1.0.0") |
| 93 | + string name = 3; // module name |
| 94 | + string dir = 4; // relative path to repo root |
| 95 | + map<string, Package> packages = 5; // package import path => package |
| 96 | + map<string, string> dependencies = 6; // module name => module_path@version |
| 97 | + map<string, File> files = 7; // relative path => file info |
| 98 | + repeated string load_errors = 8; // loading errors |
| 99 | + string compress_data = 9; // module compress info |
| 100 | +} |
| 101 | + |
| 102 | +// Node represents a symbol node in the graph |
| 103 | +// NOTE: For JSON compatibility with Go implementation, use flat format: |
| 104 | +// - mod_path, pkg_path, name (扁平 Identity) |
| 105 | +// - type (string: "FUNC", "TYPE", "VAR") |
| 106 | +// - dependencies, references, implements, inherits, groups (relations) |
| 107 | +message Node { |
| 108 | + // Flat identity fields (matching Go's embedded Identity) |
| 109 | + string mod_path = 1; |
| 110 | + string pkg_path = 2; |
| 111 | + string name = 3; |
| 112 | + |
| 113 | + // Node type as string: "FUNC", "TYPE", "VAR" |
| 114 | + string type = 4; |
| 115 | + |
| 116 | + // Unified relation fields |
| 117 | + repeated Relation dependencies = 10; |
| 118 | + repeated Relation references = 11; |
| 119 | + repeated Relation implements = 12; |
| 120 | + repeated Relation inherits = 13; |
| 121 | + repeated Relation groups = 14; |
| 122 | + |
| 123 | +} |
| 124 | + |
| 125 | +// ============ Package ============ |
| 126 | + |
| 127 | +message Package { |
| 128 | + bool is_main = 1; |
| 129 | + bool is_test = 2; |
| 130 | + string pkg_path = 3; // package import path |
| 131 | + map<string, Function> functions = 4; // function name => function |
| 132 | + map<string, Type> types = 5; // type name => type |
| 133 | + map<string, Var> vars = 6; // var name => var |
| 134 | + string compress_data = 7; // package compress info |
| 135 | +} |
| 136 | + |
| 137 | +// ============ File ============ |
| 138 | + |
| 139 | +// File represents a source file |
| 140 | +// Lightweight index: only stores names, full data in Package/Graph |
| 141 | +message File { |
| 142 | + string path = 1; // relative path to repo root |
| 143 | + repeated Import imports = 2; // import statements |
| 144 | + |
| 145 | + // Identity fields (matching Module/Package hierarchy) |
| 146 | + string mod_path = 3; // module path |
| 147 | + string pkg_path = 4; // package path |
| 148 | + |
| 149 | + // Lightweight symbol name index (reference only, full data in Package) |
| 150 | + // [新增] Accelerates get_file_structure API |
| 151 | + repeated string function_names = 5; |
| 152 | + repeated string type_names = 6; |
| 153 | + repeated string var_names = 7; |
| 154 | +} |
| 155 | + |
| 156 | +// Import represents an import/use statement |
| 157 | +message Import { |
| 158 | + string alias = 1; // optional alias |
| 159 | + string path = 2; // raw import path |
| 160 | +} |
| 161 | + |
| 162 | +// ============ Function ============ |
| 163 | + |
| 164 | +// Function represents a function or method |
| 165 | +// NOTE: Using flat fields for JSON compatibility with Go implementation |
| 166 | +message Function { |
| 167 | + bool exported = 1; // if the function is exported |
| 168 | + bool is_method = 2; // if it's a method (has receiver) |
| 169 | + bool is_interface_method = 3; // if it's an interface method stub |
| 170 | + |
| 171 | + // Flat identity fields (matching Go's embedded Identity) |
| 172 | + string mod_path = 4; |
| 173 | + string pkg_path = 5; |
| 174 | + string name = 6; |
| 175 | + |
| 176 | + // Flat FileLine fields |
| 177 | + string file = 7; |
| 178 | + int32 line = 8; |
| 179 | + |
| 180 | + string content = 9; // full function source code |
| 181 | + string signature = 10; // function signature |
| 182 | + |
| 183 | + Receiver receiver = 11; // method receiver (if is_method) |
| 184 | + repeated Relation params = 12; // function parameters |
| 185 | + repeated Relation results = 13; // function return values |
| 186 | +} |
| 187 | + |
| 188 | +// Receiver represents a method receiver |
| 189 | +message Receiver { |
| 190 | + bool is_pointer = 1; |
| 191 | + Identity type = 2; // receiver type identity |
| 192 | +} |
| 193 | + |
| 194 | +// ============ Type ============ |
| 195 | + |
| 196 | +// Type represents a struct, enum, trait, or interface |
| 197 | +message Type { |
| 198 | + bool exported = 1; // if the type is exported |
| 199 | + TypeKind type_kind = 2; // struct, interface, enum, typedef |
| 200 | + Identity identity = 3; // unique identity |
| 201 | + FileLine file_line = 4; // location in source |
| 202 | + string content = 5; // type declaration source |
| 203 | + |
| 204 | + // For struct: field types |
| 205 | + repeated Relation sub_struct = 6; // field types |
| 206 | + repeated Relation inline_struct = 7; // inherited/anonymous fields |
| 207 | + |
| 208 | + // Methods defined on this type |
| 209 | + map<string, Identity> methods = 8; |
| 210 | + |
| 211 | + // Legacy: use implements relation instead |
| 212 | + // repeated Identity implements = 9; |
| 213 | + |
| 214 | + // LLM compress result |
| 215 | + string compress_data = 10; |
| 216 | +} |
| 217 | + |
| 218 | +// TypeKind represents the kind of type |
| 219 | +enum TypeKind { |
| 220 | + STRUCT = 0; |
| 221 | + INTERFACE = 1; |
| 222 | + TYPEDEF = 2; |
| 223 | + ENUM = 3; |
| 224 | +} |
| 225 | + |
| 226 | +// ============ Variable ============ |
| 227 | + |
| 228 | +// Var represents a variable or constant |
| 229 | +message Var { |
| 230 | + bool exported = 1; // if the variable is exported |
| 231 | + bool is_const = 2; // if it's a constant |
| 232 | + bool is_pointer = 3; // if the type is a pointer |
| 233 | + Identity identity = 4; // unique identity |
| 234 | + FileLine file_line = 5; // location in source |
| 235 | + Identity type = 6; // type of the variable |
| 236 | + string content = 7; // variable declaration source |
| 237 | + |
| 238 | + // Legacy: use relations instead |
| 239 | + // repeated Dependency dependencies = 8; |
| 240 | + |
| 241 | + // For enum variants or grouped constants |
| 242 | + repeated Identity groups = 9; |
| 243 | + |
| 244 | + // LLM compress result |
| 245 | + string compress_data = 10; |
| 246 | +} |
| 247 | + |
| 248 | +// ============ Rust-Specific Extensions ============ |
| 249 | + |
| 250 | +// Rust only: Trait represents a Rust trait |
| 251 | +message Trait { |
| 252 | + Type base = 1; // embed Type |
| 253 | + repeated Identity super_traits = 2; // supertraits |
| 254 | + map<string, Identity> associated_items = 3; // associated items |
| 255 | +} |
| 256 | + |
| 257 | +// Rust only: Impl represents an impl block |
| 258 | +message Impl { |
| 259 | + bool is_trait_impl = 1; // if it's a trait impl |
| 260 | + Identity trait = 2; // trait being implemented (if any) |
| 261 | + Identity self_type = 3; // self type |
| 262 | + repeated Identity items = 4; // items in the impl block |
| 263 | +} |
| 264 | + |
| 265 | +// Rust only: Macro represents a macro definition |
| 266 | +message Macro { |
| 267 | + Identity identity = 1; |
| 268 | + FileLine file_line = 2; |
| 269 | + string body = 3; // macro body |
| 270 | +} |
0 commit comments