diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
deleted file mode 100644
index 8d0ecea0..00000000
--- a/.github/workflows/rust.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Rust
-
-on:
- push:
- branches: [ "main" ]
- pull_request:
- branches: [ "main" ]
-
-env:
- CARGO_TERM_COLOR: always
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v3
- - name: Build
- run: cargo build --verbose
- # - name: Run tests
- # run: cargo test --verbose
diff --git a/.github/workflows/simple_checks.yml b/.github/workflows/simple_checks.yml
deleted file mode 100644
index 9fddb195..00000000
--- a/.github/workflows/simple_checks.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: Simple Checks
-
-on:
- push:
- branches: [ "main" ]
- pull_request:
- branches: [ "main" ]
-
-jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - name: prepare rls
- run: rustup component add rust-analyzer
- - name: check linenos
- run: ./script/check_all_linenos.sh
-
diff --git a/.gitignore b/.gitignore
index 14fbf546..40f7ab82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -74,3 +74,4 @@ src/lang/testdata
*.json
tools
+abcoder
diff --git a/Cargo.toml b/Cargo.toml
deleted file mode 100644
index c3dfd710..00000000
--- a/Cargo.toml
+++ /dev/null
@@ -1,28 +0,0 @@
-[package]
-name = "ABCoder"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-reqwest = { version = "0.11", features = ["json"] }
-serde = { version = "1.0", features = ["derive", "rc"] }
-tokio = { version = "^1", features = ["full"] }
-serde_json = "1.0"
-serde_yaml = "0.8"
-walkdir = "^2.3"
-regex = "^1.5"
-ryze = { git = "https://github.com/welkeyever/ryze.git" }
-hyper = { version = "0.14", features = ["full"] }
-serde_urlencoded = "0.7.0"
-futures = "0.3"
-async-recursion = "1.0.5"
-csv = "1.1"
-text-splitter = "0.6.3"
-syn = { version = "2.0.59", features = ["full"] }
-quote = "1.0.36"
-rustfmt = "0.10.0"
-lazy_static = "1.4.0"
-dotenv = "0.15"
-paste = "1.0.15"
diff --git a/README.md b/README.md
index 8aaf3bbf..f5b123d9 100644
--- a/README.md
+++ b/README.md
@@ -1,126 +1,64 @@
-
-
# ABCoder: AI-Based Coder(AKA: A Brand-new Coder)

-ABCoder, an AI-powered tool, streamlines coding by keeping real-time status updates, providing lossless code compression, and giving development guidance. It enhances testing by identifying quality, generating reports, and auto-creating test cases. It also offers guidance for refactoring, including language stack switches.
-
-# Table of Contents
-
-- [ABCoder: AI-Based Coder(AKA: A Brand-new Coder)](#abcoder-ai-based-coderaka-a-brand-new-coder)
-- [Table of Contents](#table-of-contents)
-- [Overview](#overview)
-- [Quick Start](#quick-start)
- - [Prerequisites](#prerequisites)
- - [Running through Coze OpenAPI](#running-through-coze-openapi)
-- [Status Update](#status-update)
-- [Lossless Compression](#lossless-compression)
-- [Development Guide](#development-guide)
-- [Testing Enhancements](#testing-enhancements)
-- [Refactor/Rewrite Guide](#refactorrewrite-guide)
-- [Getting Involved](#getting-involved)
-
# Overview
+ABCoder, an AI-oriented code-processing SDK, is designed to enhance coding context for Large-Language-Model (LLM), and boost developing AI-assisted-coding applications.
-ABCoder is a comprehensive open-source software development tool that aims to utilize artificial intelligence to enhance
-the process of coding. This project focuses on various aspects of software development ranging from repository analysis,
-issue and pull request tracking, to automated code compression, development guidance, testing enhancement, and
-refactoring guidance.
-# Quick Start
+## Features
-## Prerequisites
-- install git and set your access token for github on cmd-line
-- install [rust-toolchain](https://www.rust-lang.org/tools/install) (stable)
-- (optional) install [ollama](https://github.com/ollama/ollama) and run your LLM
-- (optional) create a [Coze](https://www.coze.com/docs/developer_guides/coze_api_overview?_lang=en) agent and set its OpenAPI key
+- Universal Abstract Syntax Tree (UniAST), an language-independent, AI-friendly specification of code information, providing a flexible and structrual coding context for both AI and hunman.
+
+- General Parser, parses abitary-language codes to UniAST.
-## Running through Coze OpenAPI
-1. Set .env file for configuration on ABCoder's working directory. Taking Coze as an example:
-```
-# cache for repo,AST and so on
-WORK_DIR=tmp_abcoder
+- General Writer, transforms UniAST back to codes.
+
+- (Comming Soon) General Iterator, a framework for visiting the UniAST easily and implementing batch-code-processing workflows.
-# exclude dirs for repo parsing, separated by comma
-EXCLUDE_DIRS=target,gen-codes
+- (Comming Soon) Code RAG, provides a set of tools and functions to help the LLM understand your codes much deeper than ever.
-# LLM's api type
-API_TYPE=coze # coze|ollama
+Based on these features, developers can easily implement or enhance their AI-assisted-coding applications, such as reviewing, optimizing, translating, etc.
-# LLM's output language
-LANGUAGE=zh
-# Coze options
-COZE_API_TOKEN="{YOUR_COZE_API_TOKEN}"
-COZE_BOT_ID={YOUR_COZE_BOT_ID}
-```
+## Universal-Abstract-Syntax-Tree Specification
-2. compile the parsers
-```
-./script/make_parser.sh
-```
+see [UniAST Specification](docs/uniast-zh.md)
-3. compile and run ABCoder
-```
-cargo run --bin cmd compress https://xxx.git
-```
-4. Once triggered, ABCoder will take three steps:
- 1. Download the repository in {REPO_DIR}
- 2. Parse the repository and store the AST in {CACHE_DIR}
- 3. Call the LLM to compress the repository codes, and refresh the AST for each call.
-You can stop the process at anytime after step 2. You can restart the compressing by running the same command.
+# Getting Started
-5. Export the compressed results
+1. Install ABCoder:
+```bash
+go install github.com/cloudwego/abcoder@latest
```
-cargo run --bin cmd export https://xxx.git --out-dir {OUTPUT_DIR}
+2. Use ABCoder to parse a repository to UniAST (JSON)
+```bash
+abcoder parse {language} {repo-path} > ast.json
+```
+3. Do your magic with UniAST...
+4. Use ABCoder to write a UniAST back to codes
+```bash
+abcoder write {language} ast.json
```
-# Status Update
-
-The system is designed to automatically fetch the latest data from Github upon triggering relevant tasks, ensuring the
-repository status is always up-to-date. It can answer queries related to function, defects based on issue and PR
-information. For more details, check out our Issues and Pull Requests sections on Github.
-
-# Lossless Compression
-
-The system also offers a lossless compression feature for repository code. The specific implementation methods are being
-optimized, and more details will be available soon.
-
-# Development Guide
-We welcome all developers wishing to contribute to ABCoder. Our system provides detailed guidance for manual development
-and also supports auto-generation of instructions. Check out our Contribution Guide for more information.
+# Supported Languages
-# Testing Enhancements
+ABCoder currently supports the following languages:
-The system is designed to analyze existing functions and corresponding tests, identify the overall quality of testing,
-produce reports, and automatically generate test cases for weakly covered items. Our goal is to help repositories
-enhance and perfect their test cases.
+| Language | Parser | Writer |
+| -------- | ----------- | ----------- |
+| Go | ✅ | ✅ |
+| Rust | ✅ | Coming Soon |
+| C | Coming Soon | ❌ |
-# Refactor/Rewrite Guide
-We offer guidance for both small-scale feature iterations and large-scale rewrites, including language stack switches.
-Our system provides a detailed guide for manual development and also supports automated guidance generation.
# Getting Involved
We encourage developers to contribute and make this tool more powerful. If you are interested in contributing to ABCoder
-project, kindly check out our Getting Involved Guide.
+project, kindly check out our Getting Involved Guide:
+- [Parser Extension](docs/parser-zh.md)
> Note: This is a dynamic README and is subject to changes as the project evolves.
diff --git a/docs/parser-zh.md b/docs/parser-zh.md
new file mode 100644
index 00000000..8b37227c
--- /dev/null
+++ b/docs/parser-zh.md
@@ -0,0 +1,102 @@
+# ABCoder - Language Parser 介绍
+
+当前 ABCoder 基于 [LSP](https://microsoft.github.io/language-server-protocol/) 协议实现 Parser ,以达到精确依赖收集,并方便后续多语言扩展
+
+## 代码结构
+
+位于 [lang](/lang) 包下,包括:
+
+- uniast:统一 AST 结构的 golang 定义
+- lsp:LSP 协议处理 client,提供了 文件解析、引用查找、语法树解析、定义查找等接口,以及**通用的语言规范 LanguageSpec 接口**
+- collect:负责基于 LSP 符号收集和导出 UniAST,是核心运算逻辑
+- {language}:主体是对 lsp#Spec 接口的对应 {language} 规范的实现。此外还有具体 LSP server 的一些具体调用逻辑
+
+## 运算过程
+
+
+
+1. 通过命令行参数识别语言启动对应 LSP server,并传入初始化参数
+2. 遍历仓库文件,调用 `textDocument/documentSymbol` 方法获取每个文件的所有符号。对于每个符号
+ 1. 调用 `textDocument/semanticTokens/range` 方法获取符号代码中的 tokens
+ 2. 识别出有效实体的 token,调用 `textDocument/definition` 跳转到对应符号位置,从而建立节点依赖关系
+3. 循环 2 直文件结束。最终将收集到的 lsp symbols 转换为 UniAST 格式并输出
+
+## 扩展其它语言实现
+
+由于 UniAST 并不完全等价 LSP, 因此需要实现一些特定语言专属的行为接口才能进行转换。参考 lang/rust 包,大体需要实现以下能力:
+
+- GetDefaultLSP():映射用户输入 language 到具体的 lsp.Language,以及对应的 LSP 名称
+- CheckRepo():检查用户仓库情况,根据各语言规范额处理工具链等问题,并返回默认打开的第一个文件(用于触发 LSP server),以及等候 sever 初始化完成的时间(根据仓库大小来决定)
+- **LanguageSpec interface**: 核心模块,用于处理非 LSP 通用的语法信息、比如判断一个 token 是否是标准库的符号、函数签名解析等:
+- ModulePatcher: 后处理模块,用于处理语言特殊的信息收集。比如 rust 的 use 符号收集(LSP 不收集)。可以不实现
+
+### LaunguageSpec
+
+```
+用于在 LSP 符号收集过程中转换为 UniAST 所需信息,并且这些信息非 LSP 通用定义
+
+```go
+
+// Detailed implementation used for collect LSP symbols and transform them to UniAST
+type LanguageSpec interface {
+ // initialize a root workspace, and return all modules [modulename=>abs-path] inside
+ WorkSpace(root string) (map[string]string, error)
+
+ // give an absolute file path and returns its module name and package path
+ // external path should alse be supported
+ // FIXEM: some language (like rust) may have sub-mods inside a file, but we still consider it as a unity mod here
+ NameSpace(path string) (string, string, error)
+
+ // tells if a file belang to language AST
+ ShouldSkip(path string) bool
+
+ // return the first declaration token of a symbol, as Type-Name
+ DeclareTokenOfSymbol(sym DocumentSymbol) int
+
+ // tells if a token is an AST entity
+ IsEntityToken(tok Token) bool
+
+ // tells if a token is a std token
+ IsStdToken(tok Token) bool
+
+ // return the SymbolKind of a token
+ TokenKind(tok Token) SymbolKind
+
+ // tells if a symbol is a main function
+ IsMainFunction(sym DocumentSymbol) bool
+
+ // tells if a symbol is a language symbol (func, type, variable, etc) in workspace
+ IsEntitySymbol(sym DocumentSymbol) bool
+
+ // tells if a symbol is public in workspace
+ IsPublicSymbol(sym DocumentSymbol) bool
+
+ // declare if the language has impl symbol
+ // if it return true, the ImplSymbol() will be called
+ HasImplSymbol() bool
+ // if a symbol is an impl symbol, return the token index of interface type, receiver type and first-method start (-1 means not found)
+ // ortherwise the collector will use FunctionSymbol() as receiver type token index (-1 means not found)
+ ImplSymbol(sym DocumentSymbol) (int, int, int)
+
+ // if a symbol is a Function or Method symbol, return the token index of Receiver (-1 means not found),TypeParameters, InputParameters and Outputs
+ FunctionSymbol(sym DocumentSymbol) (int, []int, []int, []int)
+}
+```
+
+- Rust-parser 实现位置:[RustSpec](/lang/rust/spec.go)
+
+```
+
+### ModulePatcher
+
+用于后处理收集完成的模块信息
+
+```go
+// ModulePatcher supplements some information for module
+type ModulePatcher interface {
+ // Patch is called after collect all symbols
+ Patch(ast *parse.Module)
+}
+```
+
+- Rust-parser 实现: [RustModulePatcher](/lang/rust/patch.go)
diff --git a/docs/uniast-zh.md b/docs/uniast-zh.md
new file mode 100644
index 00000000..9cd567ab
--- /dev/null
+++ b/docs/uniast-zh.md
@@ -0,0 +1,675 @@
+# Universal Abstract-Syntax-Tree Specification (v0.1.0)
+
+Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种LLM亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的 定义 及其 相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。
+
+
+# Identity 节点唯一标识
+
+为了保证精确查询和可扩展存储,约定 `ModPath?PkgPath#SymbolName` 为 AST Node 的全球唯一标识。
+
+
+```json
+{
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession/backup",
+ "Name": "RecoverCtxOnDemands"
+}
+```
+
+- ModPath: 一个完整的构建单元,ModPath内容为安装路径@版本号。该信息对于LLM并不需要,只是为了保证 Identity 的全球唯一性而保存。它在各个语言中对应不同概念:
+
+ - Golang:对应 module,如 github.com/cloudwego/hertz@v0.1.0
+
+ - Rust:对应 crate,如 [serde_json](https://crates.io/crates/serde_json)@v1.0.114
+
+ 为了方便人工debug,这里有个实现约定:
+
+ - 仓库内部函数或类型(包括**本包和本仓库内**其它子包函数)节点的Module**通常**不带版本号
+
+ - 外部函数(第三方repo引入的函数)的 Module **通常**带上版本号
+
+ - 判断一个 Module 是否为第三方依赖尽量通过 Module.Dir 是否为空来判断,这里不保证
+
+
+- PkgPath:语言中一个独立的命名空间,PkgPath对应语言中一个包的导入路径
+
+ - Golang:对应 package,如 github.com/cloudwego/hertz/pkg/app/server
+
+ - Rust:对应 mod,如 [serde_json](https://crates.io/crates/serde_json)::[value](https://docs.rs/serde_json/1.0.114/serde_json/value/index.html)
+
+ - 提示:这里应该尽量等同于代码文件中的 import (use) 路径,方便 LLM 理解
+
+
+- Name:在包内的唯一符号名
+
+ - **如果节点为method,应该以**`TypeName.MethodName`**来表示。
+
+ - **此外,有些语言如**rust允许一个类型为不同的接口实现同名方法**(比如rust),因此为了避免冲突TypeName可进一步扩展为` InterfaceName` 形式
+
+
+- **字符串(Key)形式**
+
+ - Full() 完整形式为 `ModPath?PkgPath#Name`
+
+ - String() Format形式为 `PkgPath#Name`,一般通过该形式展示给 LLM 即可
+
+
+- **每个 AST 都会带有 Identity,但是是以内嵌的形式到具体节点字段中(Name、ModPath、PkgPath 三个字段)**
+
+
+
+
+
+# Go Struct 形式
+- 代码详见 [Repository](/lang/uniast/ast.go) 定义
+
+
+# JSON 形式
+
+以下以 [cloudwego/localsession](https://github.com/cloudwego/localsession.git) 库解析为示例介绍
+
+
+## Repository
+
+一个仓库由 实体Modules 和 关系Graph 组成
+
+
+```json
+{
+ "Identity": "/Users/bytedance/golang/work/abcoder/tmp/localsession",
+ "Modules": {
+ "github.com/bytedance/gopkg@v0.0.0-20230728082804-614d0af6619b": {},
+ "github.com/cloudwego/localsession": {}
+ },
+ "Graph": {}
+}
+```
+
+- Identity: repo 的唯一名称。由于abcoder parser目前不获取仓库git信息,因此一般使用当前所处的绝对路径作为Identity
+
+
+- Modules:包含的子模块,{ModPath} : {Module AST} 的字典,本仓库模块和外部依赖模块都可以出现在Modules中,但是需要通过ModulePath来区分。
+
+ - 本仓库模块ModePath == Module.Name
+
+ - 外部依赖模块ModePath== Modele.Name@version
+
+
+- Graph:AST Node 的依赖拓扑图,见下文【Graph】
+
+
+### Module
+
+代码独立编译单元,对应Identity中的ModPath,内部包含各个包
+
+
+```json
+{
+ "Name": "github.com/cloudwego/localsession",
+ "Language": "go",
+ "Version": "",
+ "Name": "github.com/cloudwego/localsession",
+ "Dir": ".",
+ "Packages": {
+ "github.com/cloudwego/localsession": {},
+ "github.com/cloudwego/localsession/backup": {}
+ },
+ "Dependencies": {
+ "github.com/bytedance/gopkg": "github.com/bytedance/gopkg@v0.0.0-20230728082804-614d0af6619b"
+ },
+ "Files": {
+ ".github/ISSUE_TEMPLATE/bug_report.md": {},
+ "backup/metainfo.go": {}
+ }
+}
+```
+
+- Name:模块名(不带版本号)
+
+
+- Language:代码使用的语言--对于多语言的仓库,一个模块的语言必须是唯一的。但是一个仓库可以有不同语言的模块。
+
+
+- Dir:**模块与仓库根的相对路径。注意:**
+
+ - **只有本仓库内的模块需要设置且不能为空**
+
+ - **第三方依赖必须为空(当前用于判断是否为第三方依赖)**
+
+
+- Dependencies:模块构建的第三方依赖模块字典 {ModName}:{ModPath}
+
+
+- Packages: 包含的子包,{PkgPath}:{Pacakge AST} 字典
+
+
+- Files:模块文件信息,key为**相对repo的路径。**这里建议包括仓库所有文件,方便writer回写
+
+
+#### File
+
+文件信息,包括代码文件和非代码文件都会记录
+
+
+```json
+
+{
+ "Path": "manager.go",
+ "Imports": [],
+ "Package": "github.com/cloudwego/localsession"
+}
+```
+
+- Path: 文件**相对仓库根的路径**
+
+
+- Imports: import 代码,
+
+
+##### Import
+
+```json
+{
+ "Alias": "_",
+ "Path": "\"unsafe\""
+}
+```
+
+- Path:导入路径主要用于writer写入代码,具体内容根据各个语言情况而定
+
+ - rust 中为 `use xx::yy;`
+
+ - Golang 中为 `"github.com/cloudwego/abcoder"`
+
+
+- Alias:导入别名,可为空
+
+
+#### Package
+
+一个代码命名空间,对应Identity.PkgPath,内部包含各个AST Node实体
+
+
+```json
+{
+ "IsMain": false,
+ "IsTest": false,
+ "PkgPath": "github.com/cloudwego/localsession/backup",
+ "PkgPath": "github.com/cloudwego/localsession/backup",
+ "Functions": {
+ "BackupCtx": {}
+ },
+ "Types": {},
+ "Vars": {}
+}
+```
+
+- PkgPath:模块路径,见【Identity】介绍
+
+
+- IsMain: 是否是二进制包
+
+
+- IsTest: 是否是测试包
+
+
+- Functions:包含函数AST, {FuncName}:{Function AST} 的字典
+
+
+- Types:包含类型AST,{TypeName}:{Type AST}的字典
+
+
+- Vars:包含全局变量/常量, {VarName}:{Variant AST} 的字典
+
+
+##### Function
+
+函数类型的AST Node实体,对应【NodeType】为 FUNC,包括函数、方法、接口函数
+
+
+```json
+{
+ "Exported": true,
+ "IsMethod": true,
+ "IsInterfaceMethod": false,
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "SessionManager.BindSession",
+ "File": "manager.go",
+ "Line": 134,
+ "StartOffset": 3290,
+ "EndOffset": 3573,
+ "Content": "// BindSession binds the session with current goroutine\nfunc (self *SessionManager) BindSession(Identity SessionIdentity, s Session) {\n\tshard := self.shards[uint64(Identity)%uint64(self.opts.ShardNumber)]\n\n\tshard.Store(Identity, s)\n\n\tif self.opts.EnableImplicitlyTransmitAsync {\n\t\ttransmitSessionIdentity(Identity)\n\t}\n}",
+ "Receiver": {
+ "IsPointer": true,
+ "Type": {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "SessionManager"
+ }
+ },
+ "Params": [
+ {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "SessionIdentity",
+ "File": "manager.go",
+ "Line": 134,
+ "StartOffset": 3386,
+ "EndOffset": 3398
+ },
+ {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "Session",
+ "File": "manager.go",
+ "Line": 134,
+ "StartOffset": 3400,
+ "EndOffset": 3409
+ }
+ ],
+ "FunctionCalls": [
+ {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "transmitSessionIdentity",
+ "File": "manager.go",
+ "Line": 140,
+ "StartOffset": 3547,
+ "EndOffset": 3564
+ }
+ ],
+ "MethodCalls": [
+ {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "com/cloudwego/localsession.Store",
+ "File": "manager.go",
+ "Line": 137,
+ "StartOffset": 3485,
+ "EndOffset": 3490
+ }
+ ],
+ "Types": [],
+ "Vars": []
+}
+```
+
+- ModPath: 模块路径,见【Identity】介绍
+
+
+- PkgPath:包路径,见【Identity】介绍
+
+
+- Name:函数名称
+
+ - 如果函数为method,**应该**以 {TypeName}.{Methodname}来表示
+
+
+- File:所在的文件名
+
+
+- Line:**起始位置文件的行号**
+
+
+- StartOffset:代码起始位置**相对文件头的字节偏移量**
+
+
+- EndOffset: 代码结束位置**相对文件头的字节偏移量**
+
+
+- Exported:是否包外可见导出
+
+
+- IsMethod:是否是一个方法
+
+
+- IsInterfaceMethod:是否是接口的方法--这里abcoder parse收集InterfaceMethod为了方便LLM理解,但是实际上write中并不会认为其是一个语言实体
+
+
+- Receiver:如果是方法的话,会有的receiver结构体。
+
+ - IsPointer:是否是指针接受者(可改变对象内容)。这个在某些语言中有比较重要意义,因此保留
+
+ - Type:对应的receiver结构体Identity
+
+
+- Params:入参中关联的类型的Dependency数组(见下文【Dependency】),如果是匿名信参数ParamName由ParamTypeName替代
+
+
+- Results:出参中关联的类型Dependency数组, {ResultName}:{Result Type Identity},如果是匿名信参数ParamName由ParamTypeName替代
+
+
+- Content:函数完整内容,包括函数签名+`\n`+函数实现代码
+
+
+- FunctionCalls:当前函数中调用的其他函数Dependency数组。按依赖在代码中出现的次序排列(并去重)。元素为对应的 AST 节点 Identity
+
+
+- MethodCalls:当前函数中调用的方法Dependency数组,按依赖在代码中出现的次序排列(并去重)。规则同【FunctionCalls】。
+
+
+- Types: 当前函数内引用的类型,如 `var x TypeX`中的TypeX
+
+
+- Vars:当前函数内引用的全局量,包括变量和常量
+
+
+###### Dependency
+
+表示一个依赖关系,包含依赖节点Id、依赖产生位置等信息,方便LLM准确识别
+
+
+```
+{
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "transmitSessionIdentity",
+ "File": "manager.go",
+ "Line": 140,
+ "StartOffset": 3547,
+ "EndOffset": 3564
+}
+```
+
+- ModPath: 模块路径,见【Identity】介绍
+
+
+- PkgPath:包路径,见【Identity】介绍
+
+
+- Name: 结构体名称
+
+
+- File:依赖点(不是被依赖节点)token所处的代码文件
+
+
+- Line:依赖点(不是被依赖节点)token所处的代码行
+
+
+- StartOffset:依赖点(不是被依赖节点)token起始位置相对代码文件的偏移
+
+
+- EndOffset:依赖点(不是被依赖节点)token结束位置相对代码文件的偏移
+
+
+##### Type
+
+类型定义,【NodeType】为 TYPE,包括具体语言中的类型定义,如 结构体、枚举、接口、类型别名等
+
+
+```json
+{
+ "Exported": true,
+ "TypeKind": "interface",
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "Session",
+ "File": "session.go",
+ "Line": 25,
+ "StartOffset": 725,
+ "EndOffset": 1027,
+ "Content": "// Session represents a local storage for one session\ntype Session interface {\n\t// IsValid tells if the session is valid at present\n\tIsValid() bool\n\n\t// Get returns value for specific key\n\tGet(key interface{}) interface{}\n\n\t// WithValue sets value for specific key,and return newly effective session\n\tWithValue(key interface{}, val interface{}) Session\n}",
+ "InlineStruct": [
+ {} // dependency
+ ],
+ "Methods": {
+ "Get": {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "Session.Get"
+ },
+ "IsValid": {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "Session.IsValid"
+ },
+ "WithValue": {
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "Session.WithValue"
+ }
+ },
+ "Implements": []
+}
+```
+
+- ModPath: 模块路径,见【Identity】介绍
+
+
+- PkgPath:包路径,见【Identity】介绍
+
+
+- Name: 结构体名称
+
+
+- File:声明所在的文件名
+
+
+- Line:声明所在文件的行号
+
+
+- TypeKind:类型的种类 Kind -- 这里不做统一约束,由具体语言定义
+
+
+- Exported:是否包外可见导出
+
+
+- Content:具体结构体定义,包括类型签名+`\n`+类型具体字段
+
+
+- SubStructs:字段中非嵌套引用的子结构体类型**Dependency**(不包括 go 原始类型),map key为字段名,val为对应类型 AST 节点 Identity
+
+
+- InlineStructs: 字段中嵌套引用的子结构体类型**Dependency**(不包括 go 原始类型),map key为字段名,val对应类型 AST 节点 Identity
+
+ - 原因:在某些语言如 Golang 中嵌套子结构体的 methods 会被继承到父结构体中,因此和一般子结构体区分开,方便回溯该类型拥有的所有 method
+
+
+- Methods:结构体对应的全部方法**Identity**,key为方法名,val为函数Identity。
+
+ - 注意这里不应该包括 InlineStruct 的 methods
+
+
+- Implements:该类型实现了哪些接口**Identity**
+
+
+##### Var
+
+全局量,包括变量和常量,**但是必须是全局**
+
+
+```rust
+{
+ "IsExported": false,
+ "IsConst": false,
+ "IsPointer": false,
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "defaultShardCap",
+ "File": "manager.go",
+ "Line": 53,
+ "StartOffset": 1501,
+ "EndOffset": 1521,
+ "Type": {
+ "ModPath": "",
+ "PkgPath": "",
+ "Name": "int"
+ },
+ "Content": "var defaultShardCap int = 10"
+}
+```
+
+- ModPath: 模块路径,见【Identity】介绍
+
+
+- PkgPath:包路径,见【Identity】介绍
+
+
+- Name: 变量名称
+
+
+- File:声明所在的文件名
+
+
+- Line:声明所在文件的行号
+
+
+- IsExported:是否导出
+
+
+- IsConst:是否为常量
+
+
+- Type:其类型对应的Identity(不包括 go 原始类型),go内置类型可以只有name(如 string, uint)
+
+
+- Content:定义代码,如 `var A int = 1 `
+
+
+### Graph
+
+整个仓库的AST Node依赖拓扑图。形式为 Identity => Node 的映射,其中每个 Node 包含对其它节点的依赖关系。基于该拓扑图,可以实现**任意节点上下文的递归获取**。
+
+
+```json
+{
+ "github.com/cloudwego/localsession?github.com/cloudwego/localsession#checkEnvOptions": {},
+ "github.com/bytedance/gopkg@v0.0.0-20230728082804-614d0af6619b?github.com/bytedance/gopkg/cloud/metainfo#CountPersistentValues": {}
+}
+```
+
+其中 key 通过 【Identity 的完整字符串】形式得到
+
+
+#### Node
+
+一个node表示一个独立的语法单元,通常包括代码、位置信息和依赖关系
+
+
+```go
+{
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "checkEnvOptions",
+ "Type": "FUNC",
+ "Dependencies": [
+ {
+ "Kind": "Dependency",
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "SESSION_CONFIG_KEY",
+ "Line": 1
+ }
+ ],
+ "References": [
+ {
+ "Kind": "Reference",
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "InitDefaultManager",
+ "Line": 3
+ }
+ ]
+}
+```
+
+- ModPath: 目标节点模块路径,见【Identity】介绍
+
+
+- PkgPath:目标节点包路径,见【Identity】介绍
+
+
+- Name: 目标节点变量名称
+
+
+- Type: 目标节点类型,见【NodeType】介绍
+
+
+- Dependencies:该节点依赖的其他节点,每个元素对象为Relation
+
+
+- References:依赖该节点的其他节点,每个元素对象为Relation
+
+
+##### NodeType
+
+包括三种类型:
+
+
+```
+// Node Type
+type NodeType int
+
+const (
+ UNKNOWN NodeType = iota
+ // top Function、 methods
+ FUNC
+ // Struct、TypeAlias、Enum...
+ TYPE
+ // Global Varable or Global Const
+ VAR
+)
+```
+
+- FUNC:函数,包括方法、顶层函数
+
+
+- TYPE:类型定义,包括 struct、类型别名、接口等通用的类型定义
+
+
+- VAR:全局变量或常量(不包括局部变量,因为我们局部变量可以收集到FUNC或TYPE定义中)
+
+
+#### Relation
+
+用于存储两个节点之间的关系。
+
+
+```
+{
+ "Kind": "Dependency",
+ "ModPath": "github.com/cloudwego/localsession",
+ "PkgPath": "github.com/cloudwego/localsession",
+ "Name": "SESSION_CONFIG_KEY",
+ "Line": 1,
+ "Desc": "",
+ "Codes": ""
+}
+```
+
+- Kind: 关系类型,目前包括 Dependency 和 Reference,分别表示依赖和引用。
+
+
+- ModPath: 模块路径,见【Identity】介绍
+
+
+- PkgPath:包路径,见【Identity】介绍
+
+
+- Name: 变量名称
+
+- Line: 产生关系的位置在主节点代码的相对行号(从0开始)
+
+
+## 完整JSON示例
+
+- https://github.com/cloudwego/localsession
+
+ - 命令:` git clone https://github.com/cloudwego/localsession.git && abcoder parse go ./localsession -load-external-symbol`
+
+ - 输出 [localsession.json](https://huggingface.co/datasets/AsterDY/abcoder/blob/main/repos/localsession.json)
+
+
+- https://github.com/cloudwego/metainfo
+
+ - 命令`git clone https://github.com/cloudwego/metainfo.git && abcoder parse rust ./metainfo -load-external-symbol`
+
+ - 输出 [metainfo.json](https://huggingface.co/datasets/AsterDY/abcoder/blob/main/repos/metainfo.json)
+
+
+# 扩展其它语言 Parser
+
+当前ABCoder/src/lang 已经支持通过LSP来进行第三方语言解析,但是由于LSP对各个语言特性(主要是函数签名和Import)没有统一规范,因此需要扩展实现一些接口才能适配。详见 [ABCoder-Language Plugin 开发规范](https://bytedance.sg.larkoffice.com/docx/QNZwdYBPjoUUKtxD3XMlstlpglh)
+Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种LLM亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的 定义 及其 相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。
+
diff --git a/src/lang/go.mod b/go.mod
similarity index 50%
rename from src/lang/go.mod
rename to go.mod
index fda87b5e..141256fc 100644
--- a/src/lang/go.mod
+++ b/go.mod
@@ -1,27 +1,19 @@
-module github.com/cloudwego/abcoder/src/lang
+module github.com/cloudwego/abcoder
go 1.23.0
-toolchain go1.24.1
-
require (
github.com/Knetic/govaluate v3.0.0+incompatible
- github.com/cloudwego/abcoder/src/uniast v0.0.0
github.com/davecgh/go-spew v1.1.1
github.com/sourcegraph/go-lsp v0.0.0-20240223163137-f80c5dd31dfd
github.com/sourcegraph/jsonrpc2 v0.2.0
- github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.10.0
golang.org/x/mod v0.24.0
- golang.org/x/tools v0.31.0
+ golang.org/x/tools v0.32.0
)
require (
- github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
- github.com/spf13/pflag v1.0.5 // indirect
- golang.org/x/sync v0.12.0 // indirect
+ golang.org/x/sync v0.13.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
-
-replace github.com/cloudwego/abcoder/src/uniast => ../uniast
diff --git a/src/lang/go.sum b/go.sum
similarity index 66%
rename from src/lang/go.sum
rename to go.sum
index 6c9693e7..4737dbe5 100644
--- a/src/lang/go.sum
+++ b/go.sum
@@ -1,33 +1,25 @@
github.com/Knetic/govaluate v3.0.0+incompatible h1:7o6+MAPhYTCF0+fdvoz1xDedhRb4f6s9Tn1Tt7/WTEg=
github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM=
github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
-github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
-github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sourcegraph/go-lsp v0.0.0-20240223163137-f80c5dd31dfd h1:Dq5WSzWsP1TbVi10zPWBI5LKEBDg4Y1OhWEph1wr5WQ=
github.com/sourcegraph/go-lsp v0.0.0-20240223163137-f80c5dd31dfd/go.mod h1:SULmZY7YNBsvNiQbrb/BEDdEJ84TGnfyUQxaHt8t8rY=
github.com/sourcegraph/jsonrpc2 v0.2.0 h1:KjN/dC4fP6aN9030MZCJs9WQbTOjWHhrtKVpzzSrr/U=
github.com/sourcegraph/jsonrpc2 v0.2.0/go.mod h1:ZafdZgk/axhT1cvZAPOhw+95nz2I/Ra5qMlU4gTRwIo=
-github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
-github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
-golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
-golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
-golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
-golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
+golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
+golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU=
+golang.org/x/tools v0.32.0/go.mod h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
diff --git a/images/lang-parser.png b/images/lang-parser.png
new file mode 100644
index 00000000..e5de62f0
Binary files /dev/null and b/images/lang-parser.png differ
diff --git a/src/lang/collect/collect.go b/lang/collect/collect.go
similarity index 97%
rename from src/lang/collect/collect.go
rename to lang/collect/collect.go
index a7bcb494..c68f8693 100644
--- a/src/lang/collect/collect.go
+++ b/lang/collect/collect.go
@@ -22,18 +22,18 @@ import (
"strings"
"unicode"
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/lsp"
- . "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/rust"
+ "github.com/cloudwego/abcoder/lang/log"
+ . "github.com/cloudwego/abcoder/lang/lsp"
+ "github.com/cloudwego/abcoder/lang/rust"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
type CollectOption struct {
+ Language uniast.Language
LoadExternalSymbol bool
NeedStdSymbol bool
NoNeedComment bool
NeedTest bool
- Language lsp.Language
Excludes []string
}
@@ -75,9 +75,9 @@ type functionInfo struct {
OutputsSorted []dependency `json:"-"`
}
-func switchSpec(l lsp.Language) lsp.LanguageSpec {
+func switchSpec(l uniast.Language) LanguageSpec {
switch l {
- case Rust:
+ case uniast.Rust:
return &rust.RustSpec{}
default:
panic(fmt.Sprintf("unsupported language %s", l))
@@ -94,7 +94,7 @@ func NewCollector(repo string, cli *LSPClient) *Collector {
deps: map[*DocumentSymbol][]dependency{},
vars: map[*DocumentSymbol]dependency{},
}
- if cli.Language == Rust {
+ if cli.Language == uniast.Rust {
ret.modPatcher = &rust.RustModulePatcher{Root: repo}
}
return ret
@@ -522,7 +522,7 @@ func (c *Collector) collectImpl(ctx context.Context, sym *DocumentSymbol, depth
}
var impl string
if fn > 0 && fn < len(sym.Tokens) {
- impl = lsp.ChunkHead(sym.Text, sym.Location.Range.Start, sym.Tokens[fn].Location.Range.Start)
+ impl = ChunkHead(sym.Text, sym.Location.Range.Start, sym.Tokens[fn].Location.Range.Start)
}
if impl == "" || len(impl) < len(sym.Name) {
impl = sym.Name
diff --git a/src/lang/collect/collect_test.go b/lang/collect/collect_test.go
similarity index 95%
rename from src/lang/collect/collect_test.go
rename to lang/collect/collect_test.go
index 5d88ebe7..d2662fb5 100644
--- a/src/lang/collect/collect_test.go
+++ b/lang/collect/collect_test.go
@@ -23,9 +23,9 @@ import (
"testing"
"time"
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/lsp"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
var testroot = "../../../testdata"
diff --git a/src/lang/collect/export.go b/lang/collect/export.go
similarity index 98%
rename from src/lang/collect/export.go
rename to lang/collect/export.go
index e6c41c93..3101d8a6 100644
--- a/src/lang/collect/export.go
+++ b/lang/collect/export.go
@@ -21,10 +21,10 @@ import (
"path/filepath"
"strings"
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/lsp"
- . "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/lsp"
+ . "github.com/cloudwego/abcoder/lang/lsp"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
type dependency struct {
diff --git a/src/lang/golang/parser/ctx.go b/lang/golang/parser/ctx.go
similarity index 99%
rename from src/lang/golang/parser/ctx.go
rename to lang/golang/parser/ctx.go
index 13f59b0c..0a118002 100644
--- a/src/lang/golang/parser/ctx.go
+++ b/lang/golang/parser/ctx.go
@@ -25,8 +25,8 @@ import (
"path/filepath"
"strings"
- "github.com/cloudwego/abcoder/src/lang/uniast"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
"golang.org/x/tools/go/packages"
)
diff --git a/src/lang/golang/parser/file.go b/lang/golang/parser/file.go
similarity index 99%
rename from src/lang/golang/parser/file.go
rename to lang/golang/parser/file.go
index a028f171..1115f6ee 100644
--- a/src/lang/golang/parser/file.go
+++ b/lang/golang/parser/file.go
@@ -23,7 +23,7 @@ import (
"strconv"
"strings"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
)
func (p *GoParser) parseFile(ctx *fileContext, f *ast.File) error {
diff --git a/src/lang/golang/parser/go_ast.go b/lang/golang/parser/go_ast.go
similarity index 98%
rename from src/lang/golang/parser/go_ast.go
rename to lang/golang/parser/go_ast.go
index e717bf24..2eafc6bc 100644
--- a/src/lang/golang/parser/go_ast.go
+++ b/lang/golang/parser/go_ast.go
@@ -22,7 +22,7 @@ import (
"os"
"strings"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
)
var (
diff --git a/src/lang/golang/parser/go_ast_test.go b/lang/golang/parser/go_ast_test.go
similarity index 90%
rename from src/lang/golang/parser/go_ast_test.go
rename to lang/golang/parser/go_ast_test.go
index 0a360906..6907dfc7 100644
--- a/src/lang/golang/parser/go_ast_test.go
+++ b/lang/golang/parser/go_ast_test.go
@@ -19,70 +19,10 @@ import (
"encoding/json"
"testing"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
- "github.com/davecgh/go-spew/spew"
+ . "github.com/cloudwego/abcoder/lang/uniast"
"github.com/stretchr/testify/require"
)
-func Test_goParser_GeMainOnDepends(t *testing.T) {
- type fields struct {
- modName string
- homePageDir string
- opts Options
- }
- tests := []struct {
- name string
- fields fields
- }{
- {
- name: "test",
- fields: fields{
- homePageDir: "../../../../../tmp/cloudwego/kitex",
- opts: Options{
- ReferCodeDepth: 1,
- CollectComment: true,
- },
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- p := newGoParser(tt.fields.modName, tt.fields.homePageDir, tt.fields.opts)
- n, err := p.getNode(NewIdentity("github.com/cloudwego/kitex", "github.com/cloudwego/kitex/pkg/generic", "ParseContent"))
- if err != nil {
- t.Fatal(err)
- }
- if n == nil {
- t.Fatal("nil get node")
- }
- spew.Dump(p)
- pj, err := json.MarshalIndent(n, "", " ")
- if err != nil {
- t.Fatal(err)
- }
- println(string(pj))
- ids, err := p.searchName("main")
- if err != nil {
- t.Log(err.Error())
- }
- if len(ids) == 0 {
- t.Fatal("not found")
- }
- spew.Dump(ids)
- dep, e := p.getNode(Identity{"github.com/cloudwego/kitex", "github.com/cloudwego/kitex/pkg/generic", "BinaryThriftGeneric"})
- if e != nil {
- t.Fatal(e)
- }
- spew.Dump(dep.(*Function).Content)
- var repo = NewRepository(tt.fields.modName)
- for _, id := range ids {
- loadNode(p, id.PkgPath, id.Name, &repo)
- }
- spew.Dump(repo)
- })
- }
-}
-
func TestCases(t *testing.T) {
type fields struct {
mod string
diff --git a/src/lang/golang/parser/option.go b/lang/golang/parser/option.go
similarity index 100%
rename from src/lang/golang/parser/option.go
rename to lang/golang/parser/option.go
diff --git a/src/lang/golang/parser/parser.go b/lang/golang/parser/parser.go
similarity index 99%
rename from src/lang/golang/parser/parser.go
rename to lang/golang/parser/parser.go
index 804ab20e..0cce04cc 100644
--- a/src/lang/golang/parser/parser.go
+++ b/lang/golang/parser/parser.go
@@ -26,7 +26,7 @@ import (
"regexp"
"strings"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
)
//---------------- Golang Parser -----------------
diff --git a/src/lang/golang/parser/pkg.go b/lang/golang/parser/pkg.go
similarity index 99%
rename from src/lang/golang/parser/pkg.go
rename to lang/golang/parser/pkg.go
index 68b395a6..61845902 100644
--- a/src/lang/golang/parser/pkg.go
+++ b/lang/golang/parser/pkg.go
@@ -23,7 +23,7 @@ import (
"path/filepath"
"strings"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
"golang.org/x/tools/go/packages"
)
diff --git a/src/lang/golang/parser/pkg_test.go b/lang/golang/parser/pkg_test.go
similarity index 95%
rename from src/lang/golang/parser/pkg_test.go
rename to lang/golang/parser/pkg_test.go
index 81c35c58..7c9ea858 100644
--- a/src/lang/golang/parser/pkg_test.go
+++ b/lang/golang/parser/pkg_test.go
@@ -24,7 +24,7 @@ import (
"path/filepath"
"testing"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
)
func Test_goParser_ParseRepo(t *testing.T) {
@@ -40,7 +40,7 @@ func Test_goParser_ParseRepo(t *testing.T) {
name: "test",
fields: fields{
modName: "github.com/cloudwego/localsession",
- homePageDir: "../../../../tmp/localsession",
+ homePageDir: "../../../tmp/localsession",
},
},
}
@@ -93,7 +93,7 @@ func Test_goParser_ParseDirs(t *testing.T) {
{
name: "test",
args: args{
- homePageDir: "../../../../testdata/golang",
+ homePageDir: "../../../testdata/golang",
modName: "a.b/c",
pkg: "a.b/c/cmd",
opts: Options{
@@ -166,7 +166,7 @@ func Test_goParser_ParseNode(t *testing.T) {
name: "test",
fields: fields{
modName: "github.com/cloudwego/localsession",
- homePageDir: "../../../../../tmp/localsession",
+ homePageDir: "../../../tmp/localsession",
},
args: args{
pkgPath: "github.com/modern-go/gls",
diff --git a/src/lang/golang/parser/utils.go b/lang/golang/parser/utils.go
similarity index 98%
rename from src/lang/golang/parser/utils.go
rename to lang/golang/parser/utils.go
index 000cdba5..eed93e36 100644
--- a/src/lang/golang/parser/utils.go
+++ b/lang/golang/parser/utils.go
@@ -29,7 +29,7 @@ import (
"strings"
"github.com/Knetic/govaluate"
- . "github.com/cloudwego/abcoder/src/lang/uniast"
+ . "github.com/cloudwego/abcoder/lang/uniast"
"golang.org/x/mod/modfile"
)
@@ -186,7 +186,9 @@ func getTypeKind(n ast.Expr) TypeKind {
func getNamedType(typ types.Type) (ty types.Object, isPointer bool) {
if pt, ok := typ.(*types.Pointer); ok {
typ = pt.Elem()
- } else if name, ok := typ.(*types.Named); ok {
+ isPointer = true
+ }
+ if name, ok := typ.(*types.Named); ok {
return name.Obj(), isPointer
}
return nil, isPointer
diff --git a/src/lang/golang/writer/ast.go b/lang/golang/writer/ast.go
similarity index 97%
rename from src/lang/golang/writer/ast.go
rename to lang/golang/writer/ast.go
index 1db8f93d..9dde1e20 100644
--- a/src/lang/golang/writer/ast.go
+++ b/lang/golang/writer/ast.go
@@ -19,7 +19,7 @@ package writer
import (
"strings"
- "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
func writeImport(sb *strings.Builder, impts []uniast.Import) {
diff --git a/src/lang/golang/writer/write.go b/lang/golang/writer/write.go
similarity index 89%
rename from src/lang/golang/writer/write.go
rename to lang/golang/writer/write.go
index a4fc4483..474f34f3 100644
--- a/src/lang/golang/writer/write.go
+++ b/lang/golang/writer/write.go
@@ -22,13 +22,16 @@ import (
"go/parser"
"go/token"
"os"
+ "os/exec"
"path/filepath"
+ "regexp"
"sort"
"strconv"
"strings"
- "github.com/cloudwego/abcoder/src/lang/uniast"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/utils"
)
var _ uniast.Writer = (*Writer)(nil)
@@ -36,7 +39,7 @@ var _ uniast.Writer = (*Writer)(nil)
type Options struct {
// RepoDir string
// OutDir string
- GoVersion string
+ CompilerPath string
}
type Writer struct {
@@ -55,6 +58,9 @@ type chunk struct {
}
func NewWriter(opts Options) *Writer {
+ if opts.CompilerPath == "" {
+ opts.CompilerPath = "go"
+ }
return &Writer{
Options: opts,
visited: make(map[string]map[string]*fileNode),
@@ -126,12 +132,16 @@ func (w *Writer) WriteModule(repo *uniast.Repository, modPath string, outDir str
}
}
- // go mod
+ // create go mod
var bs strings.Builder
bs.WriteString("module ")
bs.WriteString(mod.Name)
bs.WriteString("\n\ngo ")
- bs.WriteString(w.Options.GoVersion)
+ goVersion, err := w.GetGoVersion()
+ if err != nil {
+ goVersion = "1.21"
+ }
+ bs.WriteString(goVersion)
bs.WriteString("\n\n")
if len(mod.Dependencies) > 0 {
bs.WriteString("require (\n")
@@ -151,9 +161,31 @@ func (w *Writer) WriteModule(repo *uniast.Repository, modPath string, outDir str
return fmt.Errorf("write go.mod failed: %v", err)
}
+ // go mod tidy
+ cmd := exec.Command(w.Options.CompilerPath, "mod", "tidy")
+ cmd.Dir = outdir
+ if err := cmd.Run(); err != nil {
+ log.Error("go mod tidy failed: %v", err)
+ }
return nil
}
+var goVersionRegex = regexp.MustCompile(`go(\d+\.\d+(\.\d+)?)`)
+
+func (w *Writer) GetGoVersion() (string, error) {
+ cmd := exec.Command(w.Options.CompilerPath, "version")
+ out, err := cmd.Output()
+ if err != nil {
+ return "", fmt.Errorf("get go version failed: %v", err)
+ }
+ // extract with regexp
+ matches := goVersionRegex.FindStringSubmatch(string(out))
+ if len(matches) == 0 {
+ return "", fmt.Errorf("get go version failed: %v", err)
+ }
+ return matches[1], nil
+}
+
func (w *Writer) appendPackage(repo *uniast.Repository, pkg *uniast.Package) error {
for _, v := range pkg.Vars {
n := repo.GetNode(v.Identity)
diff --git a/src/lang/golang/writer/write_test.go b/lang/golang/writer/write_test.go
similarity index 97%
rename from src/lang/golang/writer/write_test.go
rename to lang/golang/writer/write_test.go
index bcae89d3..860d5934 100644
--- a/src/lang/golang/writer/write_test.go
+++ b/lang/golang/writer/write_test.go
@@ -22,7 +22,7 @@ import (
"reflect"
"testing"
- "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
func TestWriter_WriteRepo(t *testing.T) {
@@ -46,7 +46,7 @@ func TestWriter_WriteRepo(t *testing.T) {
name: "test",
fields: fields{
Options: Options{
- GoVersion: "1.18",
+ CompilerPath: "1.18",
},
},
args: args{repo: repo},
@@ -140,7 +140,6 @@ import "fmt"
name: "add",
args: args{
file: &uniast.File{
- Name: "gls.go",
Imports: []uniast.Import{
{
Path: `"runtime"`,
diff --git a/src/lang/log/logger.go b/lang/log/logger.go
similarity index 100%
rename from src/lang/log/logger.go
rename to lang/log/logger.go
diff --git a/src/lang/lsp/client.go b/lang/lsp/client.go
similarity index 96%
rename from src/lang/lsp/client.go
rename to lang/lsp/client.go
index 1ed0f55d..58479fe6 100644
--- a/src/lang/lsp/client.go
+++ b/lang/lsp/client.go
@@ -23,7 +23,8 @@ import (
"os/exec"
"time"
- "github.com/cloudwego/abcoder/src/lang/log"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/uniast"
lsp "github.com/sourcegraph/go-lsp"
"github.com/sourcegraph/jsonrpc2"
)
@@ -39,7 +40,7 @@ type LSPClient struct {
type ClientOptions struct {
Server string
- Language
+ uniast.Language
Verbose bool
}
@@ -50,7 +51,7 @@ func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientO
return nil, err
}
- cli, err := initLSPClient(context.Background(), svr, lsp.DocumentURI(NewURI(repo)), opts.Verbose)
+ cli, err := initLSPClient(context.Background(), svr, NewURI(repo), opts.Verbose)
if err != nil {
return nil, err
}
@@ -110,7 +111,7 @@ type initializeResult struct {
Capabilities interface{} `json:"capabilities,omitempty"`
}
-func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir lsp.DocumentURI, verbose bool) (*LSPClient, error) {
+func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir DocumentURI, verbose bool) (*LSPClient, error) {
h := newLSPHandler()
stream := jsonrpc2.NewBufferedStream(svr, jsonrpc2.VSCodeObjectCodec{})
conn := jsonrpc2.NewConn(ctx, stream, h)
@@ -131,7 +132,7 @@ func initLSPClient(ctx context.Context, svr io.ReadWriteCloser, dir lsp.Document
initParams := initializeParams{
ProcessID: os.Getpid(),
- RootURI: dir,
+ RootURI: lsp.DocumentURI(dir),
Capabilities: cs,
Trace: lsp.Trace(trace),
ClientInfo: lsp.ClientInfo{Name: "vscode"},
diff --git a/src/lang/lsp/client_test.go b/lang/lsp/client_test.go
similarity index 99%
rename from src/lang/lsp/client_test.go
rename to lang/lsp/client_test.go
index ba132670..22b229c2 100644
--- a/src/lang/lsp/client_test.go
+++ b/lang/lsp/client_test.go
@@ -24,7 +24,7 @@ import (
"testing"
"time"
- "github.com/cloudwego/abcoder/src/lang/log"
+ "github.com/cloudwego/abcoder/lang/log"
)
var golangLSP *LSPClient
diff --git a/src/lang/lsp/handler.go b/lang/lsp/handler.go
similarity index 98%
rename from src/lang/lsp/handler.go
rename to lang/lsp/handler.go
index 758fbe5f..ab12a8a9 100644
--- a/src/lang/lsp/handler.go
+++ b/lang/lsp/handler.go
@@ -1,11 +1,11 @@
// Copyright 2025 CloudWeGo Authors
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
-//
+//
// https://www.apache.org/licenses/LICENSE-2.0
-//
+//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -20,7 +20,7 @@ import (
"sync"
"time"
- "github.com/cloudwego/abcoder/src/lang/log"
+ "github.com/cloudwego/abcoder/lang/log"
"github.com/sourcegraph/jsonrpc2"
)
diff --git a/src/lang/lsp/lsp.go b/lang/lsp/lsp.go
similarity index 99%
rename from src/lang/lsp/lsp.go
rename to lang/lsp/lsp.go
index 9a1c3612..21fb843b 100644
--- a/src/lang/lsp/lsp.go
+++ b/lang/lsp/lsp.go
@@ -24,7 +24,7 @@ import (
"sort"
"strings"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ "github.com/cloudwego/abcoder/lang/utils"
"github.com/sourcegraph/go-lsp"
)
diff --git a/src/lang/lsp/lsp_test.go b/lang/lsp/lsp_test.go
similarity index 100%
rename from src/lang/lsp/lsp_test.go
rename to lang/lsp/lsp_test.go
diff --git a/src/lang/lsp/spec.go b/lang/lsp/spec.go
similarity index 88%
rename from src/lang/lsp/spec.go
rename to lang/lsp/spec.go
index ca31fecb..cf2a1ab2 100644
--- a/src/lang/lsp/spec.go
+++ b/lang/lsp/spec.go
@@ -14,26 +14,11 @@
package lsp
-import "github.com/cloudwego/abcoder/src/lang/uniast"
-
-type Language string
-
-const (
- Rust Language = "rust"
- Golang Language = "golang"
+import (
+ "github.com/cloudwego/abcoder/lang/uniast"
)
-func (l Language) String() string {
- switch l {
- case Rust:
- return "rust"
- case Golang:
- return "go"
- default:
- return "unknown"
- }
-}
-
+// Detailed implementation used for collect LSP symbols and transform them to UniAST
type LanguageSpec interface {
// initialize a root workspace, and return all modules [modulename=>abs-path] inside
WorkSpace(root string) (map[string]string, error)
@@ -81,7 +66,8 @@ type LanguageSpec interface {
GetUnloadedSymbol(from Token, define Location) (string, error)
}
-// Patcher is used to patch the AST of a module
+// ModulePatcher supplements some information for module
type ModulePatcher interface {
+ // Patch is called after collect all symbol
Patch(ast *uniast.Module)
}
diff --git a/src/lang/lsp/utils.go b/lang/lsp/utils.go
similarity index 96%
rename from src/lang/lsp/utils.go
rename to lang/lsp/utils.go
index fc7fc64e..f1178dc1 100644
--- a/src/lang/lsp/utils.go
+++ b/lang/lsp/utils.go
@@ -31,8 +31,8 @@
package lsp
import (
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/utils"
)
func GetDistance(text string, start Position, pos Position) int {
diff --git a/src/lang/lsp/utils_test.go b/lang/lsp/utils_test.go
similarity index 100%
rename from src/lang/lsp/utils_test.go
rename to lang/lsp/utils_test.go
diff --git a/lang/parse.go b/lang/parse.go
new file mode 100644
index 00000000..95f3d77c
--- /dev/null
+++ b/lang/parse.go
@@ -0,0 +1,183 @@
+/**
+ * Copyright 2025 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package lang
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "time"
+
+ "github.com/cloudwego/abcoder/lang/collect"
+ "github.com/cloudwego/abcoder/lang/golang/parser"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/lsp"
+ "github.com/cloudwego/abcoder/lang/rust"
+ "github.com/cloudwego/abcoder/lang/uniast"
+)
+
+// ParseOptions is the options for parsing the repo.
+type ParseOptions struct {
+ // LSP sever executable path
+ LSP string
+ // Language of the repo
+ Verbose bool
+ collect.CollectOption
+}
+
+func Parse(ctx context.Context, uri string, args ParseOptions) ([]byte, error) {
+ if !filepath.IsAbs(uri) {
+ uri, _ = filepath.Abs(uri)
+ }
+ l, lspPath, err := checkLSP(args.Language, args.LSP)
+ if err != nil {
+ return nil, err
+ }
+ openfile, opentime, err := checkRepoPath(uri, l)
+ if err != nil {
+ return nil, err
+ }
+
+ var client *lsp.LSPClient
+ if lspPath != "" {
+ // Initialize the LSP client
+ log.Info("start initialize LSP server %s...\n", lspPath)
+ var err error
+ client, err = lsp.NewLSPClient(uri, openfile, opentime, lsp.ClientOptions{
+ Server: lspPath,
+ Language: l,
+ Verbose: args.Verbose,
+ })
+ if err != nil {
+ log.Error("failed to initialize LSP server: %v\n", err)
+ return nil, err
+ }
+ log.Info("end initialize LSP server")
+ }
+
+ repo, err := collectSymbol(ctx, client, uri, args.CollectOption)
+ if err != nil {
+ log.Error("Failed to collect symbols: %v\n", err)
+ return nil, err
+ }
+ log.Info("all symbols collected, start writing to stdout...\n")
+ out, err := json.Marshal(repo)
+ if err != nil {
+ log.Error("Failed to marshal repository: %v\n", err)
+ return nil, err
+ }
+ return out, nil
+}
+
+func checkRepoPath(repoPath string, language uniast.Language) (openfile string, wait time.Duration, err error) {
+ if _, err := os.Stat(repoPath); os.IsNotExist(err) {
+ return "", 0, fmt.Errorf("repository not found: %s", repoPath)
+ }
+ switch language {
+ case uniast.Rust:
+ // NOTICE: open the Cargo.toml file is required for Rust projects
+ openfile, wait = rust.CheckRepo(repoPath)
+ default:
+ openfile = ""
+ wait = 0
+ }
+
+ log.Info("open file '%s' and wait for %d seconds for initialize workspace\n", openfile, wait/time.Second)
+ return
+}
+
+func checkLSP(language uniast.Language, lspPath string) (l uniast.Language, s string, err error) {
+ switch language {
+ case uniast.Rust:
+ l, s = rust.GetDefaultLSP()
+ case uniast.Golang:
+ l = uniast.Golang
+ s = ""
+ if _, err := exec.LookPath("go"); err != nil {
+ if _, err := os.Stat(lspPath); os.IsNotExist(err) {
+ log.Error("Go compiler not found, please make it excutable!\n", lspPath)
+ return uniast.Unknown, "", err
+ }
+ }
+ return
+ default:
+ return uniast.Unknown, "", fmt.Errorf("unsupported language: %s", language)
+ }
+ // check if lsp excutable
+ if lspPath != "" {
+ if _, err := exec.LookPath(lspPath); err != nil {
+ if _, err := os.Stat(lspPath); os.IsNotExist(err) {
+ log.Error("Language server %s not found, please make it excutable!\n", lspPath)
+ return uniast.Unknown, "", err
+ }
+ }
+ s = lspPath
+ }
+
+ return
+}
+
+func collectSymbol(ctx context.Context, cli *lsp.LSPClient, repoPath string, opts collect.CollectOption) (repo *uniast.Repository, err error) {
+ if opts.Language == uniast.Golang {
+ repo, err = callGoParser(ctx, repoPath, opts)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ collector := collect.NewCollector(repoPath, cli)
+ collector.CollectOption = opts
+ log.Info("start collecting symbols...\n")
+ err = collector.Collect(ctx)
+ if err != nil {
+ return nil, err
+ }
+ log.Info("all symbols collected.\n")
+ log.Info("start exporting symbols...\n")
+ repo, err = collector.Export(ctx)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if err := repo.BuildGraph(); err != nil {
+ return nil, err
+ }
+ return repo, nil
+}
+
+func callGoParser(ctx context.Context, repoPath string, opts collect.CollectOption) (*uniast.Repository, error) {
+ goopts := parser.Options{}
+ if opts.LoadExternalSymbol {
+ goopts.ReferCodeDepth = 1
+ }
+ if !opts.NoNeedComment {
+ goopts.CollectComment = true
+ }
+ if opts.NeedTest {
+ goopts.NeedTest = true
+ }
+ goopts.Excludes = opts.Excludes
+ p := parser.NewParser(repoPath, repoPath, goopts)
+ repo, err := p.ParseRepo()
+ if err != nil {
+ return nil, err
+ }
+ return &repo, nil
+}
diff --git a/src/lang/patch/lib.go b/lang/patch/lib.go
similarity index 97%
rename from src/lang/patch/lib.go
rename to lang/patch/lib.go
index daf1260b..36a6a5f3 100644
--- a/src/lang/patch/lib.go
+++ b/lang/patch/lib.go
@@ -21,9 +21,9 @@ import (
"path/filepath"
"sort"
- "github.com/cloudwego/abcoder/src/lang/golang/writer"
- "github.com/cloudwego/abcoder/src/lang/uniast"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ "github.com/cloudwego/abcoder/lang/golang/writer"
+ "github.com/cloudwego/abcoder/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/utils"
)
// PatchModule patches the ast Nodes onto module files
diff --git a/src/lang/patch/lib_test.go b/lang/patch/lib_test.go
similarity index 98%
rename from src/lang/patch/lib_test.go
rename to lang/patch/lib_test.go
index 9ea32ea2..920f56be 100644
--- a/src/lang/patch/lib_test.go
+++ b/lang/patch/lib_test.go
@@ -19,7 +19,7 @@ package patch
import (
"testing"
- "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
var root = "../../../tmp"
diff --git a/src/lang/rust/ast.go b/lang/rust/ast.go
similarity index 98%
rename from src/lang/rust/ast.go
rename to lang/rust/ast.go
index 34ded247..038e00d4 100644
--- a/src/lang/rust/ast.go
+++ b/lang/rust/ast.go
@@ -20,9 +20,9 @@ import (
"os"
"strings"
- "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/uniast"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ "github.com/cloudwego/abcoder/lang/lsp"
+ "github.com/cloudwego/abcoder/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/utils"
)
// UseNode represents a module node in the dependency tree
diff --git a/src/lang/rust/ast_test.go b/lang/rust/ast_test.go
similarity index 98%
rename from src/lang/rust/ast_test.go
rename to lang/rust/ast_test.go
index 10fb925e..7f1ba8c3 100644
--- a/src/lang/rust/ast_test.go
+++ b/lang/rust/ast_test.go
@@ -1,11 +1,11 @@
// Copyright 2025 CloudWeGo Authors
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
-//
+//
// https://www.apache.org/licenses/LICENSE-2.0
-//
+//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
diff --git a/src/lang/rust/patch.go b/lang/rust/patch.go
similarity index 94%
rename from src/lang/rust/patch.go
rename to lang/rust/patch.go
index 86701ec4..83ea45db 100644
--- a/src/lang/rust/patch.go
+++ b/lang/rust/patch.go
@@ -18,8 +18,8 @@ import (
"os"
"path/filepath"
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/uniast"
)
type RustModulePatcher struct {
diff --git a/src/lang/rust/repo.go b/lang/rust/repo.go
similarity index 94%
rename from src/lang/rust/repo.go
rename to lang/rust/repo.go
index 5f0344b9..0bf2c041 100644
--- a/src/lang/rust/repo.go
+++ b/lang/rust/repo.go
@@ -1,11 +1,11 @@
// Copyright 2025 CloudWeGo Authors
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
-//
+//
// https://www.apache.org/licenses/LICENSE-2.0
-//
+//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,9 +22,9 @@ import (
"strconv"
"time"
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/utils"
)
const MaxWaitDuration = 5 * time.Minute
@@ -73,8 +73,8 @@ next:
return openfile, wait
}
-func GetDefaultLSP() (lang lsp.Language, name string) {
- return lsp.Rust, "rust-analyzer"
+func GetDefaultLSP() (lang uniast.Language, name string) {
+ return uniast.Rust, "rust-analyzer"
}
func GetLastCommitTime(repo string) time.Time {
diff --git a/src/lang/rust/repo_test.go b/lang/rust/repo_test.go
similarity index 96%
rename from src/lang/rust/repo_test.go
rename to lang/rust/repo_test.go
index 3e40bc0b..3b6dc336 100644
--- a/src/lang/rust/repo_test.go
+++ b/lang/rust/repo_test.go
@@ -1,11 +1,11 @@
// Copyright 2025 CloudWeGo Authors
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
-//
+//
// https://www.apache.org/licenses/LICENSE-2.0
-//
+//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,7 +18,7 @@ import (
"testing"
"time"
- "github.com/cloudwego/abcoder/src/lang/log"
+ "github.com/cloudwego/abcoder/lang/log"
)
func TestCheckRepo(t *testing.T) {
diff --git a/src/lang/rust/rust_test.go b/lang/rust/rust_test.go
similarity index 98%
rename from src/lang/rust/rust_test.go
rename to lang/rust/rust_test.go
index 209bd4a0..b6861187 100644
--- a/src/lang/rust/rust_test.go
+++ b/lang/rust/rust_test.go
@@ -1,11 +1,11 @@
// Copyright 2025 CloudWeGo Authors
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
-//
+//
// https://www.apache.org/licenses/LICENSE-2.0
-//
+//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,7 +22,7 @@ import (
"strings"
"testing"
- lsp "github.com/cloudwego/abcoder/src/lang/lsp"
+ lsp "github.com/cloudwego/abcoder/lang/lsp"
)
func TestRustSpec_NameSpace(t *testing.T) {
diff --git a/src/lang/rust/spec.go b/lang/rust/spec.go
similarity index 99%
rename from src/lang/rust/spec.go
rename to lang/rust/spec.go
index 88a96bd3..f68223f6 100644
--- a/src/lang/rust/spec.go
+++ b/lang/rust/spec.go
@@ -22,8 +22,8 @@ import (
"sort"
"strings"
- lsp "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/utils"
+ lsp "github.com/cloudwego/abcoder/lang/lsp"
+ "github.com/cloudwego/abcoder/lang/utils"
)
var _ lsp.LanguageSpec = (*RustSpec)(nil)
diff --git a/src/lang/rust/utils/lsp.go b/lang/rust/utils/lsp.go
similarity index 98%
rename from src/lang/rust/utils/lsp.go
rename to lang/rust/utils/lsp.go
index df09ee9a..22fd711f 100644
--- a/src/lang/rust/utils/lsp.go
+++ b/lang/rust/utils/lsp.go
@@ -1,11 +1,11 @@
// Copyright 2025 CloudWeGo Authors
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
-//
+//
// https://www.apache.org/licenses/LICENSE-2.0
-//
+//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -21,7 +21,7 @@ import (
"sync"
"time"
- lsp "github.com/cloudwego/abcoder/src/lang/lsp"
+ lsp "github.com/cloudwego/abcoder/lang/lsp"
)
var DefaultLSPInitTime = time.Second * 60
diff --git a/src/lang/rust/utils/lsp_test.go b/lang/rust/utils/lsp_test.go
similarity index 100%
rename from src/lang/rust/utils/lsp_test.go
rename to lang/rust/utils/lsp_test.go
diff --git a/src/lang/uniast/ast.go b/lang/uniast/ast.go
similarity index 92%
rename from src/lang/uniast/ast.go
rename to lang/uniast/ast.go
index b2a0755a..9f93694c 100644
--- a/src/lang/uniast/ast.go
+++ b/lang/uniast/ast.go
@@ -28,9 +28,20 @@ type Language string
const (
Golang Language = "go"
Rust Language = "rust"
- Unknown Language = "unknown"
+ Unknown Language = ""
)
+func (l Language) String() string {
+ switch l {
+ case Rust:
+ return "rust"
+ case Golang:
+ return "go"
+ default:
+ return string(l)
+ }
+}
+
func NewLanguage(lang string) (l Language) {
// sp := strings.Split(lang, "@")
// if len(sp) > 1 {
@@ -47,11 +58,15 @@ func NewLanguage(lang string) (l Language) {
}
}
+// Node ID (string) => Node
+// Node ID comes from Identity.Full()
+type NodeGraph map[string]*Node
+
// Repository
type Repository struct {
Name string `json:"id"` // module name
- Modules map[string]*Module // module name => Library
- Graph map[string]*Node
+ Modules map[string]*Module // module name => module
+ Graph NodeGraph // node id => node
}
func (r Repository) ID() string {
@@ -78,7 +93,6 @@ func NewRepository(name string) Repository {
}
type File struct {
- Name string
Path string
Imports []Import `json:",omitempty"`
Package *PkgPath `json:",omitempty"`
@@ -109,7 +123,6 @@ func (i Import) Equals(other Import) bool {
func NewFile(path string) *File {
// abs, _ := filepath.Abs(path)
ret := File{
- Name: filepath.Base(path),
Path: path,
}
return &ret
@@ -223,12 +236,16 @@ func ModPathName(mod ModPath) string {
return mod
}
-// Identity holds identity information about a third party declaration
+// Identity is the universal-unique for an ast node.
type Identity struct {
- ModPath `json:"ModPath" jsonschema:"description=the compiling module of the ast node, the format is {ModName} or {ModName}@{Version}"` // ModPath is the module which the package belongs to
- PkgPath `json:"PkgPath" jsonschema:"description=the namespace of the ast node"` // Import Path of the third party package
+ // module id, must be unique within a repo
+ ModPath `json:"ModPath" jsonschema:"description=the compiling module of the ast node, the format is {ModName} or {ModName}@{Version}"`
+
+ // path id, must be unique within a module
+ PkgPath `json:"PkgPath" jsonschema:"description=the namespace of the ast node"`
- Name string `json:"Name" jsonschema:"description=unique name of the ast node, the format is one of {FunctionName}, {TypeName}.{MethodName}, {InterfaceName}<{TypeName}>.{MethodName}, {TypeName}"` // Unique Name of declaration (FunctionName, TypeName.MethodName, InterfaceName.MethodName, or TypeName)
+ // symbol id , must be unique within a package
+ Name string `json:"Name" jsonschema:"description=unique name of the ast node, the format is one of {FunctionName}, {TypeName}.{MethodName}, {InterfaceName}<{TypeName}>.{MethodName}, {TypeName}"`
}
func NewIdentity(mod, pkg, name string) Identity {
@@ -239,6 +256,21 @@ func NewIdentity(mod, pkg, name string) Identity {
return Identity{ModPath: mod, PkgPath: pkg, Name: name}
}
+func NewIdentityFromString(str string) (ret Identity) {
+ sp := strings.Split(str, "?")
+ if len(sp) == 2 {
+ ret.ModPath = sp[0]
+ str = sp[1]
+ }
+ sp = strings.Split(str, "#")
+ if len(sp) == 2 {
+ ret.PkgPath = sp[0]
+ str = sp[1]
+ }
+ ret.Name = str
+ return ret
+}
+
// return full packagepath.name
func (i Identity) String() string {
return i.PkgPath + "#" + i.Name
diff --git a/src/lang/uniast/ast_test.go b/lang/uniast/ast_test.go
similarity index 100%
rename from src/lang/uniast/ast_test.go
rename to lang/uniast/ast_test.go
diff --git a/src/lang/uniast/node.go b/lang/uniast/node.go
similarity index 90%
rename from src/lang/uniast/node.go
rename to lang/uniast/node.go
index 9e70b928..1112b9ea 100644
--- a/src/lang/uniast/node.go
+++ b/lang/uniast/node.go
@@ -171,19 +171,28 @@ func (r *Repository) BuildGraph() error {
return nil
}
+// RelationKind
type RelationKind string
const (
+ // DEPENDENCY: the target node is a dependency of the current node
DEPENDENCY RelationKind = "Dependency"
- REFERENCE RelationKind = "Reference"
+ // REFERENCE: the target node is a reference of the current node
+ REFERENCE RelationKind = "Reference"
)
+// Relation between two nodes
type Relation struct {
- Kind RelationKind
- Identity // target node
- Line int
- Desc *string `json:",omitempty"`
- Codes *string `json:",omitempty"`
+ // Kind of the relation
+ Kind RelationKind
+ // target node
+ Identity
+ // start line-offset of the target token related to the current node's codes
+ Line int
+ // information about this relation
+ Desc *string `json:",omitempty"`
+ // related codes representing this relation, comming from current node's codes
+ Codes *string `json:",omitempty"`
}
// type marshalerRelation struct {
@@ -201,13 +210,16 @@ type Relation struct {
// return json.Marshal(rr)
// }
-// Node 类型
+// Node Type
type NodeType int
const (
UNKNOWN NodeType = iota
+ // top Function、 methods
FUNC
+ // Struct、TypeAlias、Enum...
TYPE
+ // Global Varable or Global Const
VAR
)
@@ -251,12 +263,18 @@ func NewNodeType(typ string) NodeType {
}
}
+// an Entity in a language
type Node struct {
+ // unique identity of the node
Identity
- Type NodeType
+ // Node Type, must be one of FUNC, TYPE, VAR
+ Type NodeType
+ // other nodes that depends on this node
Dependencies []Relation
- References []Relation
- Repo *Repository `json:"-"`
+ // other nodes that reference this node
+ References []Relation
+ // the repo that this node belongs to
+ Repo *Repository `json:"-"`
}
func (n Node) GetDependency(id Identity) *Relation {
diff --git a/src/lang/uniast/parser.go b/lang/uniast/parser.go
similarity index 100%
rename from src/lang/uniast/parser.go
rename to lang/uniast/parser.go
diff --git a/src/lang/uniast/utils.go b/lang/uniast/utils.go
similarity index 100%
rename from src/lang/uniast/utils.go
rename to lang/uniast/utils.go
diff --git a/src/lang/uniast/writer.go b/lang/uniast/writer.go
similarity index 100%
rename from src/lang/uniast/writer.go
rename to lang/uniast/writer.go
diff --git a/src/lang/utils/err.go b/lang/utils/err.go
similarity index 100%
rename from src/lang/utils/err.go
rename to lang/utils/err.go
diff --git a/src/lang/utils/files.go b/lang/utils/files.go
similarity index 100%
rename from src/lang/utils/files.go
rename to lang/utils/files.go
diff --git a/src/lang/utils/strings.go b/lang/utils/strings.go
similarity index 100%
rename from src/lang/utils/strings.go
rename to lang/utils/strings.go
diff --git a/lang/write.go b/lang/write.go
new file mode 100644
index 00000000..a1eb281b
--- /dev/null
+++ b/lang/write.go
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2025 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package lang
+
+import (
+ "context"
+ "fmt"
+
+ "github.com/cloudwego/abcoder/lang/golang/writer"
+ "github.com/cloudwego/abcoder/lang/uniast"
+)
+
+// Write writes the AST to the output directory.
+type WriteOptions struct {
+ // OutputDir is the output directory.
+ OutputDir string
+ // Compiler path
+ Compiler string
+}
+
+// Write writes the AST to the output directory.
+func Write(ctx context.Context, repo *uniast.Repository, args WriteOptions) error {
+ for mpath, m := range repo.Modules {
+ if m.IsExternal() {
+ continue
+ }
+ var w uniast.Writer
+ switch m.Language {
+ case uniast.Golang:
+ w = writer.NewWriter(writer.Options{CompilerPath: args.Compiler})
+ default:
+ return fmt.Errorf("unsupported language: %s", m.Language)
+ }
+ if err := w.WriteModule(repo, mpath, args.OutputDir); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/main.go b/main.go
new file mode 100644
index 00000000..53e026f0
--- /dev/null
+++ b/main.go
@@ -0,0 +1,156 @@
+// Copyright 2025 CloudWeGo Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * Copyright 2024 ByteDance Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main
+
+import (
+ "context"
+ "flag"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/cloudwego/abcoder/lang"
+ "github.com/cloudwego/abcoder/lang/log"
+ "github.com/cloudwego/abcoder/lang/uniast"
+ "github.com/cloudwego/abcoder/lang/utils"
+)
+
+const Usage = `abcoder [Flags]
+Action:
+ parse parse the whole repo and output UniAST
+ write write the UniAST to the output directory
+Language:
+ rust for rust codes
+ go for golang codes
+URI:
+ for action parse: the directory path of the repo
+ for action write: the file path of the UniAST for writer
+`
+
+func main() {
+ flags := flag.NewFlagSet("abcoder", flag.ExitOnError)
+ flags.Usage = func() {
+ fmt.Fprintf(os.Stderr, Usage)
+ fmt.Fprintf(os.Stderr, "Flags:\n")
+ flags.PrintDefaults()
+ }
+
+ if len(os.Args) < 4 {
+ fmt.Fprintf(os.Stderr, Usage)
+ os.Exit(1)
+ }
+
+ action := strings.ToLower(os.Args[1])
+ language := uniast.NewLanguage(os.Args[2])
+ if language == uniast.Unknown {
+ fmt.Fprintf(os.Stderr, "unsupported language: %s\n", os.Args[2])
+ os.Exit(1)
+ }
+
+ uri := os.Args[3]
+
+ flagVerbose := flags.Bool("verbose", false, "Verbose mode.")
+
+ flagOutput := flags.String("o", "", "Output path.")
+
+ switch action {
+ case "parse":
+ var opts lang.ParseOptions
+
+ flags.BoolVar(&opts.LoadExternalSymbol, "load-external-symbol", false, "load external symbols into results")
+ flags.BoolVar(&opts.NoNeedComment, "no-need-comment", false, "do not need comment (only works for Go now)")
+ flags.BoolVar(&opts.NeedTest, "need-test", false, "need parse test files (only works for Go now)")
+ flags.Var((*StringArray)(&opts.Excludes), "exclude", "exclude files or directories, support multiple values")
+ flagLsp := flags.String("lsp", "", "Specify the language server path.")
+
+ flags.Parse(os.Args[4:])
+ if flagVerbose != nil && *flagVerbose {
+ log.SetLogLevel(log.DebugLevel)
+ opts.Verbose = true
+ }
+
+ opts.Language = language
+ if flagLsp != nil {
+ opts.LSP = *flagLsp
+ }
+
+ out, err := lang.Parse(context.Background(), uri, opts)
+ if err != nil {
+ log.Error("Failed to parse: %v\n", err)
+ os.Exit(1)
+ }
+
+ if flagOutput != nil && *flagOutput != "" {
+ if err := utils.MustWriteFile(*flagOutput, out); err != nil {
+ log.Error("Failed to write output: %v\n", err)
+ }
+ } else {
+ fmt.Fprintf(os.Stdout, "%s\n", out)
+ }
+ case "write":
+ repo, err := uniast.LoadRepo(uri)
+ if err != nil {
+ log.Error("Failed to load repo: %v\n", err)
+ os.Exit(1)
+ }
+
+ var opts lang.WriteOptions
+ flags.StringVar(&opts.Compiler, "compiler", "", "destination compiler path.")
+
+ flags.Parse(os.Args[4:])
+
+ if flagVerbose != nil && *flagVerbose {
+ log.SetLogLevel(log.DebugLevel)
+ }
+ if flagOutput != nil && *flagOutput != "" {
+ opts.OutputDir = *flagOutput
+ } else {
+ opts.OutputDir = filepath.Base(repo.Name)
+ }
+
+ if err := lang.Write(context.Background(), repo, opts); err != nil {
+ log.Error("Failed to write: %v\n", err)
+ os.Exit(1)
+ }
+ }
+}
+
+type StringArray []string
+
+func (s *StringArray) Set(value string) error {
+ *s = append(*s, value)
+ return nil
+}
+
+func (s *StringArray) String() string {
+ return strings.Join(*s, ",")
+}
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
deleted file mode 100644
index 292fe499..00000000
--- a/rust-toolchain.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[toolchain]
-channel = "stable"
diff --git a/script/check_all_linenos.sh b/script/check_all_linenos.sh
index 3f8c617a..5d397943 100755
--- a/script/check_all_linenos.sh
+++ b/script/check_all_linenos.sh
@@ -15,23 +15,18 @@
root=$(dirname $(realpath $(dirname $0)))
cd $root
-echo "[Making parser]"
-./script/make_parser.sh
-echo "[Done making parser]"
-parser=tools/parser/lang
mkdir -p testdata/jsons
do_test() {
+ name="ast"
lang=$1
srcpath=$2
- name=$3
flags=$4
- echo $name...
- $parser -d -v --no-need-comment collect $lang $srcpath > testdata/jsons/$name.json 2>testdata/jsons/$name.log
- cat testdata/jsons/$name.log
- python script/check_lineno.py --json testdata/jsons/$name.json --base $srcpath $flags > testdata/jsons/$name.check
+ echo "go run . parse $lang $srcpath -verbose --no-need-comment > testdata/jsons/$name.json"
+ go run . parse $lang $srcpath -verbose --no-need-comment > testdata/jsons/$name.json
+ python3 script/check_lineno.py --json testdata/jsons/$name.json --base $srcpath $flags > testdata/jsons/$name.check
if grep -q "All functions verified successfully!" testdata/jsons/$name.check; then
echo " [PASS]"
@@ -40,5 +35,5 @@ do_test() {
exit 1
fi
}
-do_test go src/lang go "--zero_linebase"
-do_test rust testdata/rust2-wobyted rust2 "--zero_linebase --implheads"
+do_test go testdata/golang "--zero_linebase"
+do_test rust testdata/rust2 "--zero_linebase --implheads"
diff --git a/script/make_parser.sh b/script/make_parser.sh
deleted file mode 100755
index af107282..00000000
--- a/script/make_parser.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-# Copyright 2025 CloudWeGo Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-root=$(dirname $(realpath $(dirname $0)))
-
-# make lang
-cd $root/src/lang
-go build -o ../../tools/parser/lang .
diff --git a/src/bin/cmd.rs b/src/bin/cmd.rs
deleted file mode 100644
index 15931e90..00000000
--- a/src/bin/cmd.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::{env, panic, process, thread, time::Duration};
-
-use ABCoder::{
- compress::compress::compress_all,
- export::{self, ExportOptions},
- parse::{self, CompressOptions},
-};
-
-#[derive(Clone, Debug)]
-struct Options {
- repo_path: String,
- action: Action,
-}
-
-#[derive(Clone, Debug)]
-enum Action {
- Compress(CompressOptions),
- Export(ExportOptions),
-}
-
-fn main() {
- // parse Options from cmd args
- let options = parse_options();
- println!("arguments: {:?}", options);
-
- match options.action {
- Action::Compress(cmp) => {
- if cmp.force_update_ast {
- merge_repo(&options.repo_path, &cmp);
- }
- compress(&options.repo_path, &cmp);
- }
- Action::Export(exp) => {
- export(&options.repo_path, &exp);
- }
- }
-}
-
-const USAGE: &str = "Usage: ABCoder [Flags]
-RepoPath: the path of the repo to compress. Can be a local path or a git url.
-Actions: compress|export
-compress: compress the repo. Including flags:
- --parse-only: only parse the repo, not compress it
- --export-compress: export the compress result
- --force-update-ast: force parsing repo and merge the previous result
- --not-load-external-symbol: not load external external symbols to speed up parsing
- --no-need-comment: not need comment in symbol content (only works for Go now)
-export: export the result to csv or markdown (default). Including flags:
- --csv: export the compress result to csv
- --out-dir : output directory path, default is $WORK_DIR
- --public-only: only export the public symbols
-";
-
-fn parse_options() -> Options {
- let args: Vec = env::args().collect();
- if args.len() < 3 {
- println!("{}", USAGE);
- process::exit(1);
- }
-
- let action = match args[1].as_str() {
- "compress" => {
- let mut compress_action = CompressOptions::default();
- if args.len() > 3 {
- for i in 3..args.len() {
- match args[i].as_str() {
- "--force-update-ast" => {
- compress_action.force_update_ast = true;
- }
- "--not-load-external-symbol" => {
- compress_action.not_load_external_symbol = true;
- }
- "--no-need-comment" => {
- compress_action.no_need_comment = true;
- }
- _ => {}
- }
- }
- }
- Action::Compress(compress_action)
- }
-
- "export" => {
- let mut opts = ExportOptions::default();
- if args.len() > 3 {
- for i in 3..args.len() {
- match args[i].as_str() {
- "--out-dir" => {
- if args.len() <= i + 1 {
- println!("--out-dir must specify a value");
- process::exit(1);
- }
- opts.output = Some(args[i + 1].clone());
- }
- "--csv" => {
- opts.csv = true;
- }
- "--public-only" => {
- opts.public_only = true;
- }
- _ => {}
- }
- }
- }
- Action::Export(opts)
- }
-
- _ => {
- println!("{}", USAGE);
- process::exit(1);
- }
- };
-
- Options {
- repo_path: args[2].clone(),
- action,
- }
-}
-
-fn compress(repo_path: &String, cmp: &CompressOptions) {
- // recoverable logic
- let run = || {
- // get the repo
- let repo = parse::get_repo(repo_path, &cmp);
- if let Err(err) = repo {
- println!("get repo error: {:?}", err);
- process::exit(1);
- }
-
- let mut repo = repo.unwrap();
- repo.save_to_cache();
- println!("successfully parsed repo: {}", repo.id);
- if cmp.parse_only {
- return;
- }
-
- // compress the repo
- println!("compressing repo: {}", repo.id);
- // block on compress
- let rt = tokio::runtime::Runtime::new().unwrap();
- rt.block_on(compress_all(&mut repo));
-
- // save the compressed repo
- repo.save_to_cache();
-
- println!("successfully compressed repo: {}", repo.id);
- };
-
- loop {
- let result = panic::catch_unwind(run);
- if let Err(err) = result {
- println!("panic: {:?}", err);
- // sleep 60s and retry
- thread::sleep(Duration::from_secs(60));
- continue;
- } else {
- process::exit(0);
- }
- }
-}
-
-fn export(repo_path: &String, cmp: &ExportOptions) {
- // get the repo
- let repo = parse::get_repo(repo_path, &CompressOptions::default());
- if let Err(err) = repo {
- println!("get repo error: {:?}", err);
-
- process::exit(1);
- }
-
- let mut repo = repo.unwrap();
-
- // export the compress
- println!("export repo: {}", repo.id);
- export::export_repo(&mut repo, cmp);
-
- println!("successfully exported repo: {}", repo.id);
-}
-
-fn merge_repo(repo_path: &String, cmp: &CompressOptions) {
- // get old repo
- let repo = parse::get_repo(repo_path, &cmp);
- if let Err(err) = repo {
- println!("get repo error: {:?}", err);
- process::exit(1);
- }
- let mut repo = repo.unwrap();
-
- // parse new repo
- let nrepo = parse::force_parse_repo(repo_path, &cmp);
- if let Err(err) = nrepo {
- println!("parse repo error: {:?}", err);
- process::exit(1);
- }
- let nrepo = nrepo.unwrap();
-
- repo.merge_with(&nrepo);
- repo.save_to_cache();
-
- println!("successfully merge repo: {}", repo.id);
-}
diff --git a/src/compress/compress.rs b/src/compress/compress.rs
deleted file mode 100644
index e1fdb67b..00000000
--- a/src/compress/compress.rs
+++ /dev/null
@@ -1,1000 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::error::Error;
-use std::ops::Add;
-
-use async_recursion::async_recursion;
-// Add these imports at the beginning of your file
-
-use types::types::{
- CalledType, Identity, KeyValueType, Repository, ToCompressFunc, ToCompressType,
-};
-
-use crate::compress::llm::compress;
-use crate::compress::llm::ToCompress;
-
-use crate::compress::types;
-use crate::config::CONFIG;
-use crate::storage::cache::get_cache;
-use crate::storage::cache::load_repo;
-
-use super::types::types::Module;
-use super::types::types::ToCompressVar;
-use super::types::types::Variant;
-
-pub fn from_json(id: &str, json: &str) -> Result> {
- let mut f: Repository = serde_json::from_str(json)?;
- if f.id == "" {
- f.id = id.to_string();
- }
- if f.graph.is_none() {
- //return err
- return Err(Box::new(std::io::Error::new(
- std::io::ErrorKind::Other,
- "graph is None",
- )));
- }
- f.save_to_cache();
- Ok(f)
-}
-
-pub fn is_externa_mod(modpath: &str) -> bool {
- return modpath == "" || modpath.contains("@");
-}
-
-pub async fn compress_all(repo: &mut Repository) {
- let mut to_compress_func = Vec::new();
- let mut to_compress_type = Vec::new();
- let mut to_compress_var = Vec::new();
-
- for (mpath, _mod) in &repo.modules {
- if is_externa_mod(mpath) {
- // NOTICE: empty dir means it's a external module, which is only used for lookup symbols
- continue;
- }
- for (_, pkg) in &_mod.packages {
- for (_, func) in &pkg.functions {
- let id = func.id();
- to_compress_func.push(id)
- }
-
- for (_, _type) in &pkg.types {
- let id = _type.id();
- to_compress_type.push(id)
- }
-
- for (_, var) in &pkg.vars {
- let id = var.id();
- to_compress_var.push(id)
- }
- }
- }
-
- for id in to_compress_var {
- let mut m: HashMap = HashMap::new();
- cascade_compress_variable(&id, repo, &mut m).await;
- }
-
- for id in to_compress_func {
- let mut m: HashMap = HashMap::new();
- cascade_compress_function(&id, repo, &mut m).await;
- }
-
- for id in to_compress_type {
- let mut m: HashMap = HashMap::new();
- cascade_compress_struct(&id, repo, &mut m).await;
- }
-
- for (mpath, _mod) in repo.clone().modules {
- if is_externa_mod(&mpath) {
- // NOTICE: empty dir means it's a external module, which is only used for lookup symbols
- continue;
- }
- for (id, pkg) in &_mod.packages {
- if pkg.compress_data.is_none() {
- compress_package(&id, &mpath, repo).await;
- }
- }
- compress_module(&mpath, repo).await;
- repo.save_to_cache();
- }
-}
-
-pub async fn compress_module(modpath: &str, repo: &mut Repository) {
- let module = repo.modules.get_mut(modpath).unwrap();
- let compress_data = module.to_compress();
- let compress_data =
- llm_compress_module(serde_json::to_string(&compress_data).unwrap().as_str()).await;
- if compress_data.is_none() {
- return;
- }
- let compress_data = compress_data.unwrap();
- module.compress_data = Some(compress_data);
- println!("finish to compress module: {}", module.name);
-}
-
-pub async fn compress_package(id: &str, module: &str, repo: &mut Repository) {
- let pkg = repo
- .modules
- .get_mut(module)
- .unwrap()
- .packages
- .get_mut(id)
- .unwrap();
- let compress_data = pkg.to_compress();
- let compress_data =
- llm_compress_package(serde_json::to_string(&compress_data).unwrap().as_str()).await;
- if compress_data.is_none() {
- return;
- }
- let compress_data = compress_data.unwrap();
- pkg.compress_data = Some(compress_data);
- repo.save_to_cache();
- println!("finish to compress package: {}", id);
-}
-
-pub fn should_compress(id: &Identity, repo: &Repository) -> bool {
- if !id.inside() || id.pkg_path.contains("kitex_gen/") || id.pkg_path.contains("hertz_gen/") {
- return false;
- } else {
- let fi = repo.get_file_line(id);
- for exclude in &CONFIG.exclude_dirs {
- // check prefix
- if fi.file.starts_with(exclude) {
- return false;
- }
- }
- return true;
- }
-}
-
-const MAX_REFERS: usize = 4;
-
-pub async fn cascade_compress_variable(
- id: &Identity,
- repo: &mut Repository,
- m: &mut HashMap,
-) {
- if !should_compress(id, &repo) {
- return;
- }
- println!("start to comprees:{:?}", id);
- let mut refs = Vec::new();
- let var_opt = {
- let v = repo.get_var(id);
- if v.is_none() {
- eprintln!("not found var, id {:?}", id);
- return;
- }
- v.unwrap().clone()
- };
-
- if let Some(d) = &var_opt.compress_data {
- if d != "" {
- return;
- }
- }
-
- let graph = repo.graph.as_ref().unwrap();
- let var_node = graph.get(&String::from(id));
- if var_node.is_none() {
- eprintln!("var node not found, id {:?}", id);
- return;
- }
- let var_node = var_node.unwrap();
- if let Some(nfs) = &var_node.references {
- for (i, v) in nfs.iter().enumerate() {
- if i >= MAX_REFERS {
- eprintln!("too many references for {:?}", id);
- break;
- }
- let c = repo.get_id_content(&v.id());
- if c.is_none() {
- eprintln!("{:?} node is not found", v);
- continue;
- }
- let elem = c.unwrap();
- refs.push(elem);
- }
- }
-
- // compress type if any
- let ty: Option = if let Some(id) = &var_opt.type_id {
- cascade_compress_struct(id, repo, m).await;
- let tt = repo.get_type(id).clone();
- if tt.is_none() {
- eprintln!("not found ty {:?}", id);
- None
- } else {
- tt.unwrap().compress_data.clone()
- }
- } else {
- None
- };
-
- if let Some(c) = llm_compress_var(&var_opt.content, refs, &ty).await {
- if c != "" {
- let var = repo
- .modules
- .get_mut(&id.mod_path)
- .unwrap()
- .packages
- .get_mut(id.pkg_path.as_str())
- .unwrap()
- .vars
- .get_mut(id.name.as_str())
- .unwrap();
- let content = c.trim().to_string();
- var.compress_data = Some(content);
- repo.save_to_cache();
- return;
- }
- }
- panic!("empty compress for {:?}", id)
-}
-
-#[async_recursion]
-pub async fn cascade_compress_function(
- id: &Identity,
- repo: &mut Repository,
- m: &mut HashMap,
-) {
- if !should_compress(id, repo) {
- return;
- }
- println!("start to comprees:{:?}", id);
- let mut to_compress_func = Vec::new();
- let mut to_compress_type = Vec::new();
- let mut to_compress_var = Vec::new();
-
- {
- let func_opt = repo.get_func(id);
- if func_opt.is_none() {
- eprintln!("not found function, id {:?}", id);
- return;
- }
- let func_opt = func_opt.unwrap();
-
- // Already compress path
- if let Some(d) = &func_opt.compress_data {
- if d != "" {
- return;
- }
- }
-
- // vars
- if let Some(vars) = &func_opt.global_vars {
- for (f) in vars {
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a calling cycle: {}", compress_key);
- continue;
- }
- let id = f.clone();
- m.insert(compress_key, true);
- to_compress_var.push(id);
- }
- }
-
- // Start to compress internal function callls
- if let Some(calls) = &func_opt.function_calls {
- for f in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive function: {}", f.name);
- continue;
- }
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a calling cycle: {}", compress_key);
- continue;
- }
- let id = f.clone();
- m.insert(compress_key, true);
- to_compress_func.push(id);
- }
- }
-
- // Start to compress internal method_calls
- if let Some(calls) = &func_opt.method_calls {
- for f in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive method: {}", f.name);
- continue;
- }
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a calling cycle: {}", compress_key);
- continue;
- }
- let id = f.clone();
- m.insert(compress_key, true);
- to_compress_func.push(id);
- }
- }
-
- // params
- if let Some(calls) = &func_opt.params {
- for f in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive function: {}", f.name);
- continue;
- }
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a calling cycle: {}", compress_key);
- continue;
- }
- let id = f.clone();
- m.insert(compress_key, true);
- to_compress_type.push(id);
- }
- }
-
- // rets
- if let Some(calls) = &func_opt.results {
- for f in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive function: {}", f.name);
- continue;
- }
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a calling cycle: {}", compress_key);
- continue;
- }
- let id = f.clone();
- m.insert(compress_key, true);
- to_compress_type.push(id);
- }
- }
-
- // types
- if let Some(calls) = &func_opt.types {
- for f in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive function: {}", f.name);
- continue;
- }
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a calling cycle: {}", compress_key);
- continue;
- }
- let id = f.clone();
- m.insert(compress_key, true);
- to_compress_type.push(id);
- }
- }
-
- // receiver
- if let Some(f) = &func_opt.receiver {
- let compress_key = String::from(&f.type_id);
- if !m.get(&compress_key).is_some() {
- let id = f.type_id.clone();
- m.insert(compress_key, true);
- to_compress_type.push(id);
- }
- }
- }
-
- // Recursive call
- for f_id in to_compress_var {
- cascade_compress_variable(&f_id, repo, m).await;
- m.remove(&f_id.to_string());
- }
-
- for f_id in to_compress_func {
- cascade_compress_function(&f_id, repo, m).await;
- m.remove(&f_id.to_string());
- }
-
- for t_id in to_compress_type {
- cascade_compress_struct(&t_id, repo, m).await;
- m.remove(&t_id.to_string());
- }
-
- let mut func_map = HashMap::new();
- let mut type_map = HashMap::new();
- let mut var_map = HashMap::new();
- let mut inputs_map = HashMap::new();
- let mut outputs_map = HashMap::new();
- let mut receiver: Option = None;
-
- let content = {
- let func_opt = repo.get_func(id).unwrap();
-
- // func body vars
- if let Some(vars) = &func_opt.global_vars {
- for (f) in vars {
- let var = repo.get_var(f);
- if var.is_none() {
- eprintln!("not found var, id {:?}", id);
- continue;
- }
- let var = var.unwrap();
- if var.compress_data.is_none() || var.compress_data.as_ref().unwrap() == "" {
- eprintln!("var {}.{} is not compressed!!!", var.pkg_path, var.name);
- var_map.insert(f.name.clone(), var.content.clone());
- } else {
- var_map.insert(f.name.clone(), var.compress_data.clone().unwrap());
- }
- }
- }
-
- // func body types
- if let Some(types) = &func_opt.types {
- for (f) in types {
- let sub_type = repo.get_type(f);
- if sub_type.is_none() {
- eprintln!("not found type, id {:?}", id);
- continue;
- }
- let sub_type = sub_type.unwrap();
- if sub_type.compress_data.is_none()
- || sub_type.compress_data.as_ref().unwrap() == ""
- {
- eprintln!(
- "sub type {}.{} is not compressed!!!",
- sub_type.pkg_path, sub_type.name
- );
- type_map.insert(sub_type.name.clone(), sub_type.content.clone());
- } else {
- type_map.insert(
- sub_type.name.clone(),
- sub_type.compress_data.clone().unwrap(),
- );
- }
- }
- }
-
- // params
- if let Some(types) = &func_opt.params {
- for (f) in types {
- let sub_type = repo.get_type(f);
- if sub_type.is_none() {
- eprintln!("not found type, id {:?}", id);
- continue;
- }
- let sub_type = sub_type.unwrap();
- if sub_type.compress_data.is_none()
- || sub_type.compress_data.as_ref().unwrap() == ""
- {
- eprintln!(
- "sub type {}.{} is not compressed!!!",
- sub_type.pkg_path, sub_type.name
- );
- inputs_map.insert(sub_type.name.clone(), sub_type.content.clone());
- } else {
- inputs_map.insert(
- sub_type.name.clone(),
- sub_type.compress_data.clone().unwrap(),
- );
- }
- }
- }
-
- // results
- if let Some(types) = &func_opt.results {
- for (f) in types {
- let sub_type = repo.get_type(f);
- if sub_type.is_none() {
- eprintln!("not found type, id {:?}", id);
- continue;
- }
- let sub_type = sub_type.unwrap();
- if sub_type.compress_data.is_none()
- || sub_type.compress_data.as_ref().unwrap() == ""
- {
- eprintln!(
- "sub type {}.{} is not compressed!!!",
- sub_type.pkg_path, sub_type.name
- );
- outputs_map.insert(sub_type.name.clone(), sub_type.content.clone());
- } else {
- outputs_map.insert(
- sub_type.name.clone(),
- sub_type.compress_data.clone().unwrap(),
- );
- }
- }
- }
-
- // receiver
- if let Some(f) = &func_opt.receiver {
- let sub_type = repo.get_type(&f.type_id);
- if sub_type.is_none() {
- eprintln!("not found type, id {:?}", f.type_id);
- } else {
- let sub_type = sub_type.unwrap();
- if sub_type.compress_data.is_none()
- || sub_type.compress_data.as_ref().unwrap() == ""
- {
- eprintln!(
- "sub type {}.{} is not compressed!!!",
- sub_type.pkg_path, sub_type.name
- );
- receiver = Some(sub_type.content.clone());
- } else {
- receiver = Some(sub_type.compress_data.clone().unwrap());
- }
- }
- }
-
- // Add the compress data of internal function calls
- if let Some(calls) = &func_opt.function_calls {
- for (f) in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive function: {}", f.name);
- continue;
- }
-
- let sub_function = repo.get_func(f);
- if sub_function.is_none() {
- eprintln!("not found function, id {:?}", id);
- continue;
- }
- let sub_function = sub_function.unwrap();
- if sub_function.compress_data.is_none()
- || sub_function.compress_data.as_ref().unwrap() == ""
- {
- eprintln!(
- "sub function {}.{} is not compressed!!!",
- sub_function.pkg_path, sub_function.name
- );
- func_map.insert(sub_function.name.clone(), sub_function.content.clone());
- } else {
- func_map.insert(
- sub_function.name.clone(),
- sub_function.compress_data.clone().unwrap(),
- );
- }
- }
- }
-
- // Add the compress data of internal method calls
- if let Some(calls) = &func_opt.method_calls {
- for (f) in calls {
- if f.name == id.name && f.pkg_path == id.pkg_path {
- eprintln!("find a recursive method: {}", f.name);
- continue;
- }
-
- let sub_function = repo.get_func(id);
- if sub_function.is_none() {
- eprintln!("not found function, id {:?}", id);
- continue;
- }
- let sub_function = sub_function.unwrap();
- if sub_function.compress_data.is_none()
- || sub_function.compress_data.as_ref().unwrap() == ""
- {
- eprintln!(
- "sub function {}.{} is not compressed!!!",
- sub_function.pkg_path, sub_function.name
- );
- func_map.insert(sub_function.name.clone(), sub_function.content.clone());
- } else {
- func_map.insert(
- sub_function.name.clone(),
- sub_function.compress_data.clone().unwrap(),
- );
- }
- }
- }
-
- if func_opt.content.is_empty() {
- eprintln!("content is empty skip it");
- Some("".to_string())
- } else {
- llm_compress_func(
- func_opt.content.as_str(),
- func_map,
- type_map,
- var_map,
- inputs_map,
- outputs_map,
- receiver,
- )
- .await
- }
- };
-
- let func_opt = repo
- .modules
- .get_mut(&id.mod_path)
- .unwrap()
- .packages
- .get_mut(id.pkg_path.as_str())
- .unwrap()
- .functions
- .get_mut(id.name.as_str())
- .unwrap();
- if let Some(c) = content {
- if c != "" {
- let content = c.trim().to_string();
- func_opt.compress_data = Some(content);
- repo.save_to_cache();
- return;
- }
- }
- panic!("empty compress for {:?}", id)
-}
-
-#[async_recursion]
-pub async fn cascade_compress_struct(
- id: &Identity,
- repo: &mut Repository,
- m: &mut HashMap,
-) {
- if !should_compress(id, &repo) {
- return;
- }
- println!("start to comprees:{:?}", id);
- let mut to_compress = Vec::new();
- let mut to_compress_func = Vec::new();
- {
- let md = repo.modules.get(&id.mod_path);
- if md.is_none() {
- eprintln!("not found module, id {:?}", id);
- return;
- }
- let p = md.unwrap().packages.get(id.pkg_path.as_str());
- if p.is_none() {
- eprintln!("not found package, id {:?}", id);
- return;
- }
- let struct_opt = p.unwrap().types.get(id.name.as_str());
- if struct_opt.is_none() {
- eprintln!("not found struct, id {:?}", id);
- return;
- }
- let stru = struct_opt.unwrap();
-
- // Already compress path
- if let Some(d) = &stru.compress_data {
- if d != "" {
- return;
- }
- }
-
- // Start to compress sub struct
- if let Some(sub) = &stru.sub_struct {
- for f in sub {
- // TODO: compress extrenal symbol too
- if !repo.contains(f) {
- continue;
- }
-
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a struct embeding cycle: {}", compress_key);
- continue;
- }
-
- let id = f.clone();
- to_compress.push(id);
- m.insert(compress_key, true);
- }
- }
-
- // Start to compress inline struct
- if let Some(inline) = &stru.inline_struct {
- for f in inline {
- // TODO: compress extrenal symbol too
- if !repo.contains(f) {
- continue;
- }
-
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a struct embeding cycle: {}", compress_key);
- continue;
- }
-
- let id = f.clone();
- to_compress.push(id);
- m.insert(compress_key, true);
- }
- }
-
- // Start to compress related methods
- if let Some(methods) = &stru.methods {
- for (_, f) in methods {
- if !repo.contains(f) {
- continue;
- }
-
- let compress_key = String::from(f);
- if m.get(&compress_key).is_some() {
- eprintln!("find a struct embeding cycle: {}", compress_key);
- continue;
- }
-
- let id = f.clone();
- to_compress_func.push(id);
- m.insert(compress_key, true);
- }
- }
- }
-
- // Recursive call
- for f_id in to_compress {
- cascade_compress_struct(&f_id, repo, m).await;
- m.remove(&f_id.to_string());
- }
- for f_id in to_compress_func {
- cascade_compress_function(&f_id, repo, m).await;
- m.remove(&f_id.to_string());
- }
-
- // Recursive compressing has done, start to compress myself.
- let mut type_map = HashMap::new();
- let mut method_map = HashMap::new();
- let content = {
- let _type = repo.get_type(id);
- if _type.is_none() {
- eprintln!("not found type, id {:?}", id);
- return;
- }
- let _type = _type.unwrap();
-
- // Add the compress data of sub struct
- if let Some(subs) = &_type.sub_struct {
- for (f) in subs {
- let sub = repo.get_type(f);
- if sub.is_none() {
- eprintln!("do not get tye type in the pkg: {:?}", f);
- continue;
- }
- let compress_data = sub.unwrap().compress_data.clone();
- if compress_data.is_none() || compress_data.as_ref().unwrap() == "" {
- type_map.insert(f.name.clone(), sub.unwrap().content.clone());
- } else {
- type_map.insert(f.name.clone(), compress_data.unwrap());
- }
- }
- }
-
- // Add the compress data of inline struct
- if let Some(inlines) = &_type.inline_struct {
- for (f) in inlines {
- let inline = repo.get_type(f);
- if inline.is_none() {
- eprintln!("do not get tye type in the pkg: {:?}", f);
- continue;
- }
- let compress_data = inline.unwrap().compress_data.clone();
- if compress_data.is_none() || compress_data.as_ref().unwrap() == "" {
- eprint!("do not get the compress data of the inline struct: {:?}", f);
- type_map.insert(f.name.clone(), inline.unwrap().content.clone());
- } else {
- type_map.insert(f.name.clone(), compress_data.unwrap());
- }
- }
- }
-
- // Add the compress data of related methods(We have done function and methods compress before, so don't worry about it.)
- if let Some(methods) = &_type.methods {
- for (k, f) in methods {
- if !repo.contains(f) {
- continue;
- }
- let func = repo.get_func(f);
- if func.is_none() || func.as_ref().unwrap().compress_data.is_none() {
- eprintln!("do not get the method of the type, id: {:?}", f);
- method_map.insert(k.clone(), func.unwrap().content.clone());
- } else {
- method_map.insert(k.clone(), func.unwrap().compress_data.clone().unwrap());
- }
- }
- }
-
- if _type.content.is_empty() {
- eprintln!("content is empty skip it");
- Some("".to_string())
- } else {
- llm_compress_type(_type.content.as_str(), type_map, method_map).await
- }
- };
-
- let mut type_opt = repo
- .modules
- .get_mut(&id.mod_path)
- .unwrap()
- .packages
- .get_mut(id.pkg_path.as_str())
- .unwrap()
- .types
- .get_mut(id.name.as_str())
- .unwrap();
- if let Some(c) = content {
- if c != "" {
- let content = c.trim().to_string();
- type_opt.compress_data = Some(content);
- repo.save_to_cache();
- return;
- }
- }
- // panic!("empty compress for {:?}", id)
-}
-
-async fn llm_compress_module(m: &str) -> Option {
- let compressing = ToCompress::ToCompressModule(m.to_string());
- let compress_data = compress(&compressing).await;
- Option::from(compress_data)
-}
-
-async fn llm_compress_package(pkg: &str) -> Option {
- let compress_pkg = ToCompress::ToCompressPkg(pkg.to_string());
- let compress_data = compress(&compress_pkg).await;
- Option::from(compress_data)
-}
-
-async fn llm_compress_var(var: &String, refs: Vec, ty: &Option) -> Option {
- let compress_var = ToCompressVar {
- content: var,
- r#type: ty,
- refers: refs,
- };
- let to_compress_str = serde_json::to_string(&compress_var).unwrap();
- let compress_enum = ToCompress::ToCompressVar(to_compress_str);
- let compress_data = compress(&compress_enum).await;
- Some(compress_data)
-}
-
-async fn llm_compress_func(
- func: &str,
- funcs: HashMap,
- types: HashMap,
- vars: HashMap,
- inputs: HashMap,
- outputs: HashMap,
- receiver: Option,
-) -> Option {
- let mut compress_func = ToCompressFunc {
- content: func.to_string(),
- related_func: None,
- related_type: None,
- related_var: None,
- receiver: None,
- params: None,
- results: None,
- };
- if !funcs.is_empty() {
- let mut related_func = Vec::new();
- for (name, compressed_data) in funcs {
- let re = CalledType {
- call_name: name,
- description: compressed_data,
- };
- related_func.push(re);
- }
- compress_func.related_func = Some(related_func);
- }
- if !types.is_empty() {
- let mut related_type = Vec::new();
- for (name, compressed_data) in types {
- let re = KeyValueType {
- name: name,
- description: compressed_data,
- };
- related_type.push(re);
- }
- compress_func.related_type = Some(related_type);
- }
- if !vars.is_empty() {
- let mut related_var = Vec::new();
- for (name, compressed_data) in vars {
- let re = KeyValueType {
- name: name,
- description: compressed_data,
- };
- related_var.push(re);
- }
- compress_func.related_var = Some(related_var);
- }
- if !inputs.is_empty() {
- let mut params = Vec::new();
- for (name, compressed_data) in inputs {
- let re = KeyValueType {
- name: name,
- description: compressed_data,
- };
- params.push(re);
- }
- compress_func.params = Some(params);
- }
- if !outputs.is_empty() {
- let mut results = Vec::new();
- for (name, compressed_data) in outputs {
- let re = KeyValueType {
- name: name,
- description: compressed_data,
- };
- results.push(re);
- }
- compress_func.results = Some(results);
- }
- if let Some(r) = receiver {
- compress_func.receiver = Some(r);
- }
- let to_compress_str = serde_json::to_string(&compress_func).unwrap();
- let compress_func_enum = ToCompress::ToCompressFunc(to_compress_str);
- let compress_data = compress(&compress_func_enum).await;
- Option::from(compress_data)
-}
-
-// depends on the compressed info of methods, so call llm_compress_func first.
-async fn llm_compress_type(
- func: &str,
- extra_type: HashMap,
- related_methods: HashMap,
-) -> Option {
- let mut compress_type = ToCompressType {
- content: func.to_string(),
- related_methods: None,
- related_types: None,
- };
- if !extra_type.is_empty() {
- let mut r_type = Vec::new();
- for (name, compressed_data) in extra_type {
- let re = KeyValueType {
- name,
- description: compressed_data,
- };
- r_type.push(re);
- }
- compress_type.related_types = Some(r_type);
- }
-
- if !related_methods.is_empty() {
- let mut r_methods = Vec::new();
- for (name, compressed_data) in related_methods {
- let re = KeyValueType {
- name,
- description: compressed_data,
- };
- r_methods.push(re);
- }
- compress_type.related_methods = Some(r_methods);
- }
-
- let to_compress_str = serde_json::to_string(&compress_type).unwrap();
- let compress_type_enum = ToCompress::ToCompressType(to_compress_str);
- let compress_data = compress(&compress_type_enum).await;
- Option::from(compress_data)
-}
-
-fn pkg_name_to_repo_name(s: &str) -> String {
- let parts: Vec<_> = s.split('/').collect();
-
- match parts.len() {
- 0 | 1 | 2 => String::from(s),
- _ => parts[1..3].join("/"),
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_pkg_name_to_repo_name() {
- assert_eq!(pkg_name_to_repo_name("a/b/c"), "b/c");
- assert_eq!(pkg_name_to_repo_name("a/b/c/d"), "b/c");
- assert_eq!(pkg_name_to_repo_name("rust"), "rust");
- }
-}
diff --git a/src/compress/llm/coze.rs b/src/compress/llm/coze.rs
deleted file mode 100644
index 363fb32e..00000000
--- a/src/compress/llm/coze.rs
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use core::panic;
-use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, CONNECTION, CONTENT_TYPE, HOST};
-use serde::{Deserialize, Serialize};
-use serde_json::{from_str, to_string, to_string_pretty};
-use std::ops::Add;
-use std::{collections::HashMap, time::Duration};
-
-use crate::{compress::llm::prompts::make_compress_prompt, config::CONFIG};
-
-use super::ToCompress;
-
-#[derive(Serialize, Deserialize, Debug)]
-struct Message {
- role: String,
- r#type: String,
- content: String,
- content_type: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct StreamResponse {
- event: String,
- message: Message,
- conversation_id: String,
- index: i32,
- is_finish: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct Response {
- messages: Vec,
- conversation_id: String,
- code: i64,
- msg: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct BotQuery {
- bot_id: String,
- user: String,
- query: String,
- stream: bool,
-}
-
-fn panic_error(msg: &str, res: &reqwest::Response) {
- let headers: HashMap = res
- .headers()
- .iter()
- .map(|(k, v)| (k.to_string(), String::from(v.to_str().unwrap_or_default())))
- .collect();
- panic!(
- "{}.\nstatus code is {}\nheader is {:?}\n",
- msg,
- res.status(),
- headers,
- );
-}
-
-fn make_lang_prompt() -> &'static str {
- match &CONFIG.language {
- crate::config::Language::Chinese => "现在,请使用中文解释以下输入:\n",
- crate::config::Language::English => {
- "Now, please use English to explain the following input:\n"
- }
- }
-}
-
-pub async fn coze_compress(to_compress: ToCompress) -> String {
- let mut headers = HeaderMap::new();
-
- let auth = format!("Bearer {}", CONFIG.coze_api_token.as_ref().unwrap());
- headers.insert(
- "Authorization",
- HeaderValue::from_str(auth.as_str()).unwrap(),
- );
- headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
- headers.insert(ACCEPT, HeaderValue::from_static("*/*"));
- headers.insert(CONNECTION, HeaderValue::from_static("keep-alive"));
-
- let client = reqwest::Client::new();
-
- let bot_id = CONFIG.coze_bot_id.as_ref().unwrap().clone();
- let to_compress_str = make_compress_prompt(&to_compress);
- let bot_query = BotQuery {
- bot_id: bot_id.to_string(),
- user: "welkey".to_string(),
- query: to_compress_str.to_string(),
- stream: true,
- };
-
- println!(
- "[coze_compress] request: {}",
- to_string(&bot_query).unwrap()
- );
-
- let rb = client
- .post("https://api.coze.com/open_api/v2/chat")
- .headers(headers)
- .json(&bot_query);
-
- let mut res = match rb.send().await {
- Ok(r) => r,
- Err(e) => {
- panic!("http request faield: {:?}", e);
- }
- };
- let status = res.status();
- if status != 200 {
- panic_error("coze request failed.", &res);
- }
-
- // streaming repsonse
- // allocate bytes buffer
- let mut sse_body = Vec::new();
- loop {
- match res.chunk().await {
- Ok(c) => {
- if let Some(chunk) = c {
- // println!("chunk: {:?}", std::str::from_utf8(&chunk).unwrap());
- sse_body.extend_from_slice(&chunk);
- } else {
- break;
- }
- }
- Err(e) => {
- panic_error(format!("coze request failed: {:?}", e).as_str(), &res);
- }
- }
- }
-
- let sse_body = std::str::from_utf8(&sse_body).unwrap();
- let mut output: String = String::new();
-
- // handle SSE datas
- for line in sse_body.lines() {
- // println!("[coze_compress] receive chunk: {}", line);
-
- if line.len() == 0 || !line.starts_with("data:") {
- continue;
- }
- let data = line.strip_prefix("data:").unwrap().trim();
- if data.len() == 0 {
- continue;
- }
- let response: StreamResponse =
- from_str(data).expect(format!("{} is not a valid json", data).as_str());
-
- if response.is_finish || response.event != "message" {
- break;
- }
-
- if &response.message.r#type != "answer" {
- continue;
- }
- output += &response.message.content;
- }
-
- println!("[coze_compress] response body: {}", output);
-
- // unary response
- // let body = res.bytes().await.unwrap();
- // let resp: Response = from_str(std::str::from_utf8(&body).unwrap()).unwrap();
- // if resp.code != 0 {
- // panic!("code is {}, msg is {}", resp.code, resp.msg);
- // }
- // for message in resp.messages {
- // output += &message.content;
- // }
-
- output
-}
diff --git a/src/compress/llm/maas.rs b/src/compress/llm/maas.rs
deleted file mode 100644
index 5e38dd4d..00000000
--- a/src/compress/llm/maas.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::{path::Path, process::Command, time::SystemTime};
-
-use crate::config::CONFIG;
-
-use super::{prompts::make_compress_prompt, ToCompress};
-
-pub fn maas_compress_py(to_compress: &ToCompress, model_name: &str) -> String {
- let message = make_compress_prompt(to_compress);
- use std::io::Write;
- use std::process::{Command, Stdio};
-
- let command = "python3";
- let path = format!("{}/maas/model_call.py", &CONFIG.work_dir);
- println!("executing command: {} {} {}", command, path, model_name);
-
- // Create a new command process with piped stdin
- let mut child = Command::new(command)
- .arg(path)
- .arg(model_name)
- .stdin(Stdio::piped())
- .stdout(Stdio::piped())
- .spawn()
- .expect("Failed to spawn command");
-
- // Write the message to the child process's stdin
- if let Some(mut stdin) = child.stdin.take() {
- stdin
- .write_all(message.as_bytes())
- .expect("Failed to write to stdin");
- }
-
- // Wait for the child process to complete
- let output = child.wait_with_output().expect("Failed to read stdout");
- // Check if the command was successful
- if output.status.success() {
- // Print the standard output
- let stdout = String::from_utf8_lossy(&output.stdout).to_string();
- println!("stdout: {}", stdout);
- return stdout;
- } else {
- // Print the standard error
- let stderr = String::from_utf8_lossy(&output.stderr);
- panic!("Error: {}", stderr);
- }
-}
-
-pub async fn maas_compress_http(to_compress: &ToCompress, model_name: &str, url: &str) -> String {
- let message = make_compress_prompt(to_compress);
- let url = format!("http://{}", url);
- let client = reqwest::Client::new();
- let response = client
- .post(&url)
- .json(&serde_json::json!({"model": model_name, "prompt": message}))
- .send()
- .await
- .expect("Failed to send request");
-
- if response.status().is_success() {
- let body = response.text().await.expect("Failed to read response body");
- // get json key "ans"
- let v: serde_json::Value = serde_json::from_str(&body).expect("Failed to parse response");
- if let Some(ans) = v.get("ans") {
- return ans.as_str().unwrap().to_string();
- } else {
- panic!("Failed to get key 'ans' from response");
- }
- } else {
- panic!("Failed to read response body");
- }
-}
diff --git a/src/compress/llm/mod.rs b/src/compress/llm/mod.rs
deleted file mode 100644
index c7b64a84..00000000
--- a/src/compress/llm/mod.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use crate::config::CONFIG;
-pub mod coze;
-pub mod maas;
-pub mod ollama;
-pub mod prompts;
-
-#[derive(Clone, Debug)]
-pub enum ToCompress {
- ToCompressFunc(String),
- ToCompressType(String),
- ToCompressVar(String),
- ToCompressPkg(String),
- ToCompressModule(String),
- ToMergeRustPkg(String),
- ToValidateRust(String),
-}
-
-pub async fn compress(to_compress: &ToCompress) -> String {
- match CONFIG.api_type.as_str() {
- "coze" => coze::coze_compress(to_compress.clone()).await,
- "maas" => {
- if &CONFIG.mass_http_url == "" {
- maas::maas_compress_py(to_compress, CONFIG.maas_model_name.as_str())
- } else {
- maas::maas_compress_http(
- to_compress,
- CONFIG.maas_model_name.as_str(),
- &CONFIG.mass_http_url,
- )
- .await
- }
- }
- "ollama" => ollama::ollama_compress(to_compress.clone()).await,
- _ => panic!("Unknown API type {}", CONFIG.api_type),
- }
-}
diff --git a/src/compress/llm/ollama.rs b/src/compress/llm/ollama.rs
deleted file mode 100644
index 0b7cd07f..00000000
--- a/src/compress/llm/ollama.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use serde::{Deserialize, Serialize};
-
-use crate::{compress::llm::prompts::make_compress_prompt, config::CONFIG};
-
-use super::ToCompress;
-
-pub async fn ollama_compress(to_compress: ToCompress) -> String {
- let request_url = format!("http://localhost:11434/api/generate");
- let to_compress_str = make_compress_prompt(&to_compress);
- let model_name = CONFIG.ollama_model.as_ref().unwrap().clone();
-
- println!("use prompt:\n{}", to_compress_str);
- let req_body: OllamaReq = OllamaReq {
- model: model_name,
- prompt: to_compress_str,
- };
- let client = reqwest::Client::new();
- let mut response = client
- .post(&request_url)
- .json(&req_body)
- .send()
- .await
- .unwrap();
-
- let mut output = String::new();
- while let Ok(Some(chunk)) = response.chunk().await {
- let result = serde_json::from_slice(&chunk);
- if result.is_err() {
- break;
- }
-
- let value: OllamaResp = result.unwrap();
-
- if !value.response.is_empty() {
- output.push_str(value.response.as_str());
- }
-
- if value.done {
- break;
- }
- }
-
- output
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct OllamaReq {
- model: String,
- prompt: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct OllamaResp {
- model: String,
- created_at: String,
- response: String,
- done: bool,
-}
diff --git a/src/compress/llm/prompts.rs b/src/compress/llm/prompts.rs
deleted file mode 100644
index 3c439832..00000000
--- a/src/compress/llm/prompts.rs
+++ /dev/null
@@ -1,781 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use crate::config::{Language, CONFIG};
-
-use super::ToCompress;
-use paste::paste;
-
-// choose the prompt based on the language
-macro_rules! choose_prompt_lang {
- ($prompt: ident) => {
- // return const {{$prompt}}_ZH or {{$prompt}}_EN
- match &CONFIG.language {
- Language::Chinese => paste! { [<$prompt _ZH>] },
- Language::English => paste! { [<$prompt _EN>] },
- }
- };
-}
-
-pub fn make_compress_prompt(to_compress: &ToCompress) -> String {
- match to_compress {
- ToCompress::ToCompressModule(f) => {
- choose_prompt_lang!(PROMPT_COMPRESS_MOD).replace("{{DATA}}", f)
- }
- ToCompress::ToCompressType(t) => {
- choose_prompt_lang!(PROMPT_COMPRESS_TYPE).replace("{{DATA}}", t)
- }
- ToCompress::ToCompressFunc(f) => {
- choose_prompt_lang!(PROMPT_COMPRESS_FUNC).replace("{{DATA}}", f)
- }
- ToCompress::ToCompressVar(v) => {
- choose_prompt_lang!(PROMPT_COMPRESS_VAR).replace("{{DATA}}", v)
- }
- ToCompress::ToCompressPkg(p) => {
- choose_prompt_lang!(PROMPT_COMPRESS_PKG).replace("{{DATA}}", p)
- }
- ToCompress::ToMergeRustPkg(p) => todo!(),
- ToCompress::ToValidateRust(p) => todo!(),
- }
-}
-
-const PROMPT_COMPRESS_TYPE_ZH: &str = r##"
-# 角色
-你是一名熟练的程序员,你擅长阅读理解并总结概括代码。你的目标是通过使这些API更易理解,帮助对这些API了解较少的工程师。
-
-# 提示
-
-## 输入格式(JSON)
-包含 一个具体的类型定义 及 其依赖的其他语言符号描述:
-- "Content": 类型定义, 格式为字符串
-- "Related_methods": 格式为数组。其中每个对象表示此类型上定义的相关方法:
- - "CallName": 方法名,格式为字符串
- - "Description": 该方法的代码或总结
-- "Related_types": 格式为数组。该数组中的每个对象表示在该类型定义中依赖的其他类型:
- - "CallName": 类型名,格式为字符串
- - "Description": 该类型的代码或总结,格式为字符串
-
-## 输出格式(text)
-直接输出总结内容。不要输出JSON(IMPORTANT)!
-
-## 总结内容
-- 该类型的主要功能和用途
-- 每个字段的意义(如果有)
-
-# 约束
-- 禁止谈论JSON的格式,不允许披露原始JSON字符串或其中的任何片段。
-- 你的总结应不包含任何代码,也不应该包括“Related_methods”和“Related_types”中的任何信息。
--直接以总结类型名称开始你的回答。
-- (IMPORTANT)输出内容必须符合500字符限制。
-
-# 具体例子
-
-## 输入
-{
- "Content": "type shard struct {\n\tlock sync.RWMutex\n\tm map[SessionID]Session\n}",
- "Related_methods": [
- {
- "Name": "Store",
- "Description": "Store用于在存储具有特定ID的会话之前锁定分片,然后在操作完成后解锁。"
- },
- {
- "Name": "Delete",
- "Description": "Delete用于从分片中删除通过其ID标识的会话,通过在删除之前锁定和之后解锁来确保线程安全。"
- },
- {
- "Name": "Load",
- "Description": "Load函数用于使用给定的会话ID从分片中检索会话,并返回会话以及一个指示会话是否找到的布尔值。"
- }
- ],
- "Related_types": [
- {
- "Name": "Session",
- "Description": "定义的类型是一个名为“Session”的接口,它概述了会话管理的结构。它包括三个方法:“IsValid”检查会话当前是否有效并返回布尔值;“Get”检索指定键的值,键和值都是interface类型,允许使用不同类型的值和键;“WithValue”设置特定键的值并返回反映此更改的新会话实例,有效地允许会话在保持不变性的同时进行修改。此接口没有相关的方法或类型。"
- },
- {
- "Name": "SessionID",
- "Description": "名为SessionID的类型定义为无符号的64位整数。此类型没有指定的相关方法或相关类型。"
- }
- ]
-}
-
-## 输出
-shard类型是一个结构体,包含一个读写锁和一个将会话ID与会话关联的映射,用于高效的会话管理。它包括如下字段:
- - Session:定义了一个用于管理会话的结构的接口。它允许检查会话是否有效,检索和设置会话中的键值。
- - SessionID:定义为无符号的64位整数,这种类型作为会话的唯一标识符。
-
-
-# 现在,请开始处理如下输入:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_TYPE_EN: &str = r##"
-# Character
-You are a skilled programmer and you are good at reading, understanding and summarizing code. Your goal is to help engineers who know less about these apis by making them easier to understand.
-
-# Tips
-
-## Input format (JSON)
-Contains a specific type definition and descriptions of other language symbols that depend on it:
-- "Content": indicates the type definition. The format is a string
-- "Related_methods": The format is an array. Where each object represents a related method defined on this type:
- - "CallName": indicates the method name. The format is a string
- - "Description": indicates the code or summary of the method
-- "Related_types": The format is an array. Each object in this array represents other types that are dependent in that type definition:
- - "CallName": Type name. The format is a string
- - "Description": indicates the code or summary of the type. The format is a string
-
-## Output format (text)
-Output summary content directly. Do not output JSON (IMPORTANT)!
-
-## Summarize the content
-- The main functions and uses of this type
-- The meaning of each field (if any)
-
-# Constraint
-- It is forbidden to talk about the format of JSON, and it is not allowed to disclose the original JSON string or any fragment of it.
-Your summary should not contain any code, nor should it include any information from "Related_methods" and "Related_types."
-- Start your answer directly with the summary type name.
-- (IMPORTANT) The output must meet the 500-character limit.
-
-
-# Specific examples
-
-## Input
-{
- "Content": "type shard struct {\n\tlock sync.RWMutex\n\tm map[SessionID]Session\n}",
- "Related_methods": [
- {
- "Name": "Store",
- "Description": "Store is used to lock the shard before storing the session with a specific ID, and then unlock it after the operation is complete."
- },
- {
- "Name": "Delete",
- "Description": "Delete is used to remove the session identified by its ID from the shard, ensuring thread safety by locking before deletion and unlocking after."
- },
- {
- "Name": "Load",
- "Description": "Load function is used to retrieve the session from the shard using the given session ID, and returns the session along with a boolean value indicating whether the session is found."
- }
- ],
- "Related_types": [
- {
- "Name": "Session",
- "Description": "The defined type is an interface named 'Session' that outlines the structure of session management. It includes three methods: 'IsValid' checks if the session is currently valid and returns a boolean value; 'Get' retrieves the value of a specified key, both key and value are of type interface, allowing values and keys of different types to be used; 'WithValue' sets the value of a specific key and returns a new session instance reflecting this change, effectively allowing the session to be modified while maintaining immutability. This interface has no related methods or types."
- },
- {
- "Name": "SessionID",
- "Description": "The type named SessionID is defined as an unsigned 64-bit integer. This type has no specified related methods or related types."
- }
- ]
-}
-
-## Output
-The shard type is a structure that contains a read/write lock and a mapping that associates the session ID with the session for efficient session management. It includes the following fields:
-- Session: Defines an interface for managing session structures. It allows you to check whether the session is valid, retrieve and set key values in the session.
-- SessionID: Defined as an unsigned 64-bit integer. This type serves as the unique identifier of a session.
-
-
-# Now, please summarize below input:
-
-{{DATA}}
-"##;
-
-const PROMPT_COMPRESS_FUNC_ZH: &str = r##"
-# 角色
-你是一名熟练的程序员,你擅长阅读理解并总结概括代码。你的目标是通过使这些API更易理解,帮助对这些API了解较少的工程师。
-
-# 提示
-
-## 输入的JSON格式
-包含 一个具体的函数定义 及 其依赖的其他语言符号描述:
-- "Content": 类型定义, 格式为字符串
-- "Related_func": 格式为数组。其中每个对象表示函数体内用到的相关函数或方法:
- - "CallName": 在函数体内的调用名称(函数名或类型名.方法名),格式为字符串
- - "Description": 该函数的代码或总结
-- "Related_type": 格式为数组。该数组中的每个对象表示在该函数定义(出入参、函数体内)中依赖的其他类型:
- - "Name": 类型名称,格式为字符串
- - "Description": 该类型的代码或总结,格式为字符串
-- "Related_var": 格式为数组。该数组中的每个对象表示在该函数体重中依赖的其他全局变量(或常量):
- - "Name": 变量名,格式为字符串
- - "Description": 该变量代码或总结,格式为字符串
-- "Receiver":(可选) 字符串。表示方法的接收者。不存在该字段表示该实体为函数
-- "Params":(可选) 格式为数组。每个对象表示函数的参数:
- - "Name": 类型名,格式为字符串
- - "Description": 参数的代码或总结,格式为字符串
-- "Results":(可选) 格式为数组。每个对象表示函数的返回值:
- - "Name": 类型名,格式为字符串
- - "Description": 返回值的代码或总结,格式为字符串
-
-## 输出格式(text)
-直接输出总结内容。不要输出JSON(IMPORTANT)!
-
-## 总结内容
-- 该函数的主要功能和用途
-- 该函数的每个参数的意义(如果有)
-
-# 约束
-- 你的总结应严格关于函数本身,禁止谈论JSON的格式,不允许披露原始JSON字符串或其中的任何片段。
-- 你的总结应不包含任何代码。
-- 直接以总结类型名称开始你的回答。
-- (IMPORTANT)输出内容必须符合500字符限制。
-
-# 具体例子
-
-### 输入
-{
- "Content": "func (self SessionCtx) Get(key interface{}) interface{} {\n\treturn self.storage.Value(key)\n}",
- "Receiver": [
- {
- "Name": "SessionCtx",
- "Description": "SessionCtx类型是一个结构体,主要用于管理会话上下文。它包含以下字段:\n- enabled:一个指向atomic.Value的指针,用于指示会话上下文是否启用。\n- storage:一个context.Context类型的值,用于存储与会话相关的上下文信息。\nSessionCtx类型的设计目的是提供一种安全且高效的方式来管理和访问会话上下文,其方法包括禁用会话、检验会话有效性、设置和获取上下文中的键值对等。"
- }
- ],
- "Params": [
- {
- "Name": "key",
- "Description": "inteface{}"
- }
- ],
- "Results": [
- {
- "Name": "interface{}"
- "Description": "interface{}"
- }
- ],
- "Related_var": null,
- "Related_func: [
- {
- "CallName": "Value",
- "Description": "Value函数用于从存储中检索与给定键相关联的值。"
- }
- ],
-}
-
-### 输出
-Get函数用于从SessionCtx的存储中检索与给定键相关联的值。
-入参:
-- key: 给定的键
-出参:
-- 会话中存储的对应的值
-
-
-# 现在,请开始处理如下输入:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_FUNC_EN: &str = r##"
-# Character
-You are a skilled programmer and you are good at reading, understanding and summarizing code. Your goal is to help engineers who know less about these apis by making them easier to understand.
-
-# Tips
-
-## Input JSON format
-Contains a specific function definition and descriptions of other language symbols on which it depends:
-- "Content": indicates the type definition. The format is a string
-- "Related_func": The format is an array. Where each object represents a related function or method used in the body of the function:
- - "CallName": specifies the callee name in the function body (func_name or type.method_name). The format is a string
- - "Description": indicates the code or summary of the function
-- "Related_type": The format is an array. Each object in this array represents other types that are dependent in the function definition (input/exit parameters, function body) :
- - "Name": indicates the type name. The format is a string
- - "Description": indicates the code or summary of the type. The format is a string
-- "Related_var": The format is an array. Each object in this array represents other global variables (or constants) that are dependent in the weight of this function:
- - "Name": indicates the name of the variable. The format is a string
- - "Description": indicates the code or summary of the variable. The format is a string
-- "Receiver": (optional) string. Indicates the receiver of the method. The absence of this field indicates that the entity is a function
-- "Params": (optional) The format is an array. Each object represents the parameters of the function:
- - "Name": indicates the parameter type name. The format is a string
- - "Description": indicates the code or summary of the parameter. The format is a string
-- "Results": (optional) The format is an array. Each object represents the return value of the function:
- - "Name": indicates the result type name. The format is a string
- - "Description": indicates the code or summary of the result. The format is a string
-
-## Output format (text)
-Output summary content directly. Do not output JSON (IMPORTANT)!
-
-## Summarize the content
-- The main function and purpose of the function
-- The meaning of each parameter of the function (if any)
-
-# Constraint
-Your summary should be strictly about the function itself, do not talk about the format of JSON, and do not allow the disclosure of the original JSON string or any fragments of it.
-- Your summary should not contain any code.
-- Start your answer directly with the summary type name.
-- (IMPORTANT) The output must meet the 500-character limit.
-
-
-# Specific examples
-
-### Type
-{
- "Content": "func (self SessionCtx) Get(key interface{}) interface{} {\n\treturn self.storage.Value(key)\n}",
- "Receiver": [
- {
- "Name": "SessionCtx",
- "Description": "SessionCtx is a struct type that is mainly used to manage session contexts. It includes the following fields:\n- enabled: a pointer to atomic.Value that indicates whether the session context is enabled.\n- storage: a value of type context.Context used to store context information related to the session.\nThe design purpose of the SessionCtx type is to provide a safe and efficient way to manage and access session contexts, and its methods include disabling sessions, checking session validity, setting and getting key-value pairs in the context, etc."
- }
- ],
- "Params": [
- {
- "Name": "interface{}",
- "Description": "inteface{}"
- }
- ],
- "Results": [
- {
- "Name": "interface{}"
- "Description": "interface{}"
- }
- ],
- "Related_var": null,
- "Related_func: [
- {
- "CallName": "Value",
- "Description": "Value function is used to retrieve the value associated with a given key from the store."
- }
- ],
-}
-
-### Output
-The Get function is used to retrieve the value associated with a given key from SessionCtx's store.
-Entry:
-- key: indicates the specified key
-Input:
-- The corresponding value stored in the session
-
-
-# Now, please summarize below input:
-
-{{DATA}}
-"##;
-
-const PROMPT_COMPRESS_VAR_ZH: &str = r##"
-# 角色
-你是一名熟练的程序员,你擅长阅读理解并总结概括代码。你的目标是通过使这些API更易理解,帮助对这些API了解较少的工程师。
-
-# 提示
-
-## 输入格式(JSON)
-包含一个 全局变量(或常量)的定义 和 引用它的其它语言节点
-- "Content": 该变量定义,格式为字符串
-- "References": 格式为数组。该数组中的每个对象表示一个引用该变量的节点:
- - 改节点具体代码,格式为字符串
-- "Type":(optional) 该变量类型的总结,格式为字符串。简单类型没有该字段。
-
-## 输出格式(text)
-直接输出总结内容。不要输出JSON(IMPORTANT)!
-
-## 总结内容
-- 该变量的主要功能和用途
-- 该变量关联的主要函数或类型(如果有)
-
-
-# 约束
-- 专注于总结lang包的基本功能,避免深入具体实现细节。
-- 编写简短且易于理解的总结,供其他工程师参考。
-- 保持与提供的输入数据一致的技术术语。
-- 输出字符限制为500字符。
-
-# 示例
-
-## 输入
-{
- "Content": "var bufferSizeLimit Integer = 1024",
- "Type": "Integer是int整型的别名"
- "Reference": [
- "func MakeBuffer(size int) []byte { if size > bufferSizeLimit { panic("over limit!") } return make([]byte, size) }",
- "func SetBufferSizeLimit(limit int) []byte { bufferSizeLimit = limit }"
- ]
-}
-
-## 输出
-bufferSizeLimit是一个整数变量,初始值为整型1024,用于限制缓冲区的大小。
-
-
-# 现在,请开始处理如下输入:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_VAR_EN: &str = r##"
-# Character
-You are a skilled programmer and you are good at reading, understanding and summarizing code. Your goal is to help engineers who know less about these apis by making them easier to understand.
-
-# Tips
-
-## Input format (JSON)
-Contains the definition of a global variable (or constant) and other language nodes that reference it
-- "Content": This variable is defined in the format of a string
-- "References": The format is an array. Each object in the array represents a node that references the variable:
-- Change the node code to a string format
-- "Type": (optional) Summary of the variable type. The format is a string. Simple types do not have this field.
-
-## Output format (text)
-Output summary content directly. Do not output JSON (IMPORTANT)!
-
-## Summarize the content
-- The main function and use of the variable
-- The main function or type associated with the variable (if any)
-
-
-# Constraint
-- Focus on summarizing the basic features of the lang package and avoid delving into specific implementation details.
-- Write short and easy to understand summaries for other engineers to refer to.
-- Technical terms that are consistent with the input data provided.
-- The output character limit is 500 characters.
-
-
-# Examples
-
-## Input
-{
- "Content": "var bufferSizeLimit Integer = 1024",
- "Type": "Integer is an alias for the int type"
- "Reference": [
- "func MakeBuffer(size int) []byte { if size > bufferSizeLimit { panic("over limit!") } return make([]byte, size) }",
- "func SetBufferSizeLimit(limit int) []byte { bufferSizeLimit = limit }"
- ]
-}
-
-## Output
-bufferSizeLimit is an integer variable with an initial value of integer 1024 that is used to limit the size of the buffer.
-
-
-# Now, please summarize below input:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_PKG_ZH: &str = r##"
-# 角色
-你是一名经验丰富的工程师,并深入了解其各种包。你的主要职责是利用其他开发人员提供的有关公共函数和类型的数据,简化并总结包的基本功能。你的目标是通过使这些包更易理解,帮助对这些包了解较少的工程师。
-
-# 提示
-
-## 输入格式(JSON)
-包含 一个具体的类型定义 及 其依赖的其他语言符号描述:
-- "PkgPath": 改包的import路径, 格式为字符串
-- "Functions": 格式为数组。其中每个对象表示此包内定义的公开函数和方法:
- - "Name": 函数名,格式为字符串
- - "Description": 该方法的代码或总结, 格式为字符串
-- "Types": 格式为数组。该数组中的每个对象表示此包内定义的公开类型:
- - "Name": 使用该类型的名称,格式为字符串
- - "Description": 该类型的代码或总结,格式为字符串
-- "Variables": 格式为数组。该数组中的每个对象表示在该包中定义的全局变量(或常量):
- - "Name": 变量名,格式为字符串
- - "Description": 该变量代码或总结,格式为字符串
-
-## 输出格式(text)
-直接输出总结内容。不要输出JSON(IMPORTANT)!
-
-## 总结内容
-- 该包的主要功能和用途
-- 该包的一些关键函数和类型的描述
-
-
-# 约束
-- 专注于总结lang包的基本功能,避免深入具体实现细节。
-- 编写简短且易于理解的总结,供其他工程师参考。
-- 保持与提供的输入数据一致的技术术语。
-- 输出字符限制为2000字符。
-
-# 示例
-
-## 输入
-{
- "PkgPath": "github.com/cloudwego/localsession/backup",
- "Functions": [
- {
- "Description": "BackupCtx用于创建一个新的会话上下文,将会话标记为启用,并将其与给定的上下文关联。然后检查是否存在默认会话管理器,如果存在,则将创建的会话绑定到会话管理器。此过程包括根据会话ID和分片编号确定会话的适当分片,将会话存储在该分片中,并在会话管理器的选项启用时异步传输会话ID。它还涉及创建或更新标签映射以确保会话ID标签的唯一性,并使用新或修改的映射更新Pprof标签。此外,通过在存储会话之前锁定分片并在存储后解锁来保证线程安全。",
- "Name": "BackupCtx",
- },
- {
- "Description": "RecoverCtxOnDemands用于根据需求使用备份处理程序恢复或更新上下文。它首先检查处理程序是否为nil,如果是,则返回未更改的当前上下文。该函数尝试使用CurSession检索当前会话,CurSession确定是否存在默认会话管理器并获取当前会话。如果未找到当前会话或其类型为SessionCtx,则函数返回未更改的上下文。然后使用Export方法从SessionCtx实例中检索存储并将其作为上下文返回。此存储与当前上下文一起传递给用户定义的处理程序,以可能生成新上下文并决定是否需要备份操作。如果不需要备份,则返回原始上下文。如果在预上下文中存在持久化的元信息值,函数将这些值合并到新或现有上下文中,优先考虑传入上下文而不是会话数据。这种双向合并确保所有持久化的元信息从先前的上下文传递到新或现有的上下文中。",
- "Name": "RecoverCtxOnDemands",
- },
- {
- "Description": "DefaultOptions用于初始化具有默认值的Options结构,包括将Enable设置为false,并使用DefaultManagerOptions方法初始化ManagerOptions,分片数为100,垃圾收集间隔为10分钟,并禁用隐式异步传输。",
- "Name": "DefaultOptions",
- },
- {
- "Description": "ClearCtx用于从默认会话管理器的分片中删除指定的会话ID(如果存在),通过检查当前例程的ID,根据该ID确定相关分片,并在找到时安全地删除会话。这还包括在启用跟踪时从Pprof标签中清除会话ID。",
- "Name": "ClearCtx",
- },
- {
- "Description": "Init用于在启用选项时初始化会话管理器。它通过基于环境变量配置管理器的选项,终止任何现有会话以防止重叠,并创建一个新的SessionManager实例来实现。这包括初始化分片,确保分片数大于零,并在必要时启动垃圾收集以确保高效的会话管理并防止并发执行。",
- "Name": "Init",
- }
- ],
- "Types": [
- {
- "Description": "\"Options\"结构包括一个名为\"Enable\"的布尔字段,并嵌入了本地会话包中的\"ManagerOptions\"。\"ManagerOptions\"专门用于指示会话管理的细节和goroutines的行为。它包括三个重要字段:\"EnableImplicitlyTransmitAsync\"、\"ShardNumber\"和\"GCInterval\"。\"EnableImplicitlyTransmitAsync\"字段是一个布尔值,便于将当前会话无缝传输到子goroutines,尽管它需要与`pprof.Do()`进行精确互动以正确操作。\"ShardNumber\"是一个整数,影响会话ID的分布,要求值大于零。\"GCInterval\"确定SessionManager中的垃圾收集频率,持续时间大于一秒钟会激活垃圾收集,值为零则关闭垃圾收集。此结构没有任何相关方法或类型。",
- "Name": "Options",
- },
- {
- "Description": "类型`BackupHandler`是一个函数类型,接受两个参数,都是`context.Context`类型,分别表示先前和当前的上下文。它返回两个值:一个新的`context.Context`类型的上下文和一个布尔值,指示是否需要备份。此类型没有相关的方法或类型。",
- "Name": "BackupHandler",
- }
- ],
- "Variables": [
- {
- "Description": "DefaultManager是一个全局变量,用于存储默认的会话管理器。它是一个指向`SessionManager`类型的指针,用于管理会话的创建、备份、恢复和清除。此变量没有相关的方法或类型。",
- "Name": "DefaultManager",
- }
- ]
-}
-
-## 输出
-此包位于github.com/cloudwego/localsession/backup,为应用中的会话管理提供工具,特别关注会话上下文的备份和恢复机制。它包括创建、备份、恢复和清除会话上下文的功能,以及使用默认或自定义设置初始化会话管理。
-关键函数:
- - BackupCtx(ctx context.Context): 创建一个新的会话上下文,使其启用并将其与给定的上下文关联。它确保线程安全,并在配置时异步传输会话ID。
- - RecoverCtxOnDemands(ctx context.Context, handler BackupHandler) context.Context: 使用备份处理程序根据需求恢复或更新上下文,能够合并先前上下文中的持久化元信息。
- - Init(opts Options): 使用指定的选项初始化会话管理器,确保高效的会话管理和垃圾收集。
-关键类型:
- - Options: 一个结构体,包括会话管理的设置,例如启用隐式异步传输、分片数量和垃圾收集间隔。
- - BackupHandler: 一种函数类型,接受两个上下文(先前和当前),返回一个新的上下文和一个布尔值,指示是否需要备份。
-此包旨在通过提供强大的会话备份和恢复机制、自定义的会话管理选项,并确保线程安全和高效的资源管理,增强应用中的会话管理。
-关键全局变量:
- - DefaultManager: 一个指向SessionManager类型的指针,用于管理会话的创建、备份、恢复和清除。
-
-
-# 现在,请开始处理如下输入:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_PKG_EN: &str = r##"
-# Character
-You are an experienced engineer who specializes in lang and has in-depth knowledge of its various packages. Your primary responsibility is to simplify and summarize the basic functionality of the lang package using data provided by other developers about common functions and types. Your goal is to help engineers who know less about these packages by making them easier to understand.
-
-# Tips
-
-## Input format (JSON)
-Contains a specific type definition and descriptions of other language symbols that depend on it:
-- "PkgPath": indicates the import path of the package. The format is a string
-- "Functions": The format is an array. Where each object represents the public functions and methods defined within this package:
-- "Name": indicates the function name. The format is a string
-- "Description": indicates the code or summary of the method. The format is a string
-- "Types": The format is an array. Each object in this array represents a public type defined within this package:
-- "Name": specifies the name of the type. The format is a string
-- "Description": indicates the code or summary of the type. The format is a string
-- "Variables": Format is an array. Each object in this array represents a global variable (or constant) defined in that package:
-- "Name": indicates the name of the variable. The format is a string
-- "Description": indicates the code or summary of the variable. The format is a string
-
-## Output format (text)
-Output summary content directly. Do not output JSON (IMPORTANT)!
-
-## Summarize the content
-- The main functions and uses of the package
-- Description of some of the key functions and types of the package
-
-
-# Constraint
-- Focus on summarizing the basic features of the lang package and avoid delving into specific implementation details.
-- Write short and easy to understand summaries for other engineers to refer to.
-- Technical terms that are consistent with the input data provided.
-- The output character limit is 2000 characters.
-
-# Examples
-
-## Input
-{
- "PkgPath": "github.com/cloudwego/localsession/backup",
- "Functions": [
- {
- "Description": "BackupCtx is used to create a new session context, mark the session as enabled, and associate it with the given context. It ensures thread safety",
- "Name": "BackupCtx",
- },
- {
- "Description": "RecoverCtxOnDemands is used to recover or update the context on demand using a backup handler, allowing you to merge persistent meta-information from previous contexts.",
- "Name": "RecoverCtxOnDemands",
- },
- {
- "Description": "DefaultOptions is used to initialize the Options structure with default values, including setting Enable to false and initializing ManagerOptions with DefaultManagerOptions method.",
- "Name": "DefaultOptions",
- },
- {
- "Description": "ClearCtx is used to remove the specified session ID from the shards of the default session manager, ensuring thread safety.",
- "Name": "ClearCtx",
- },
- {
- "Description": "Init is used to initialize the session manager when options are enabled.",
- "Name": "Init",
- }
- ],
- "Types": [
- {
- "Description": "Options structure includes a boolean field named \"Enable\" and embeds \"ManagerOptions\" from the local session package.",
- "Name": "Options",
- },
- {
- "Description": "BackupHandler type is a function type that accepts two contexts (previous and current) and returns a new context and a boolean value indicating whether a backup is required.",
- "Name": "BackupHandler",
- }
- ],
- "Variables": [
- {
- "Description": "DefaultManager is a global variable used to store the default session manager.",
- "Name": "DefaultManager",
- }
- ]
-}
-
-## Output
-This package is located in github.com/cloudwego/localsession/backup, to provide tools to application session management, pay special attention to the backup and restore mechanism of session context. It includes the ability to create, back up, restore, and clear session context, as well as initialize session management with default or custom Settings.
-Key functions:
-- BackupCtx(ctx context.context): Creates a new session context, enables it and associates it with the given Context. It ensures thread-safe and asynchronously transfers session ids when configured.
-- RecoverCtxOnDemands(ctx context.Context, handler BackupHandler) context.Context: Use a backup handler to restore or update the context on demand, allowing you to merge persistent meta-information from previous contexts.
-- Init(opts Options): Initializes the session manager with the specified options, ensuring efficient session management and garbage collection.
-Key types:
-- Options: A structure that includes Settings for session management, such as enabling implicit asynchronous transfer, number of shards, and garbage collection interval.
-- BackupHandler: A function type that accepts two contexts (previous and current) and returns a new context and a Boolean value indicating whether a backup is required.
-This package is designed to enhance session management in applications by providing a robust session backup and recovery mechanism, custom session management options, and ensuring thread-safe and efficient resource management.
-Key global variables:
-- DefaultManager: A pointer to the SessionManager type, which is used to manage session creation, backup, recovery, and clearing.
-
-
-# Now, please summarize below input:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_MOD_ZH: &str = r##"
-# 角色
-你是一名经验丰富的工程师,并深入了解其各种模块。你的主要职责是利用其他开发人员提供的有关公共函数和类型的数据,简化并总结模块的基本功能。你的目标是通过使这些包更易理解,帮助对这些包了解较少的工程师。
-
-# 提示
-
-## 输入格式(JSON)
-包含 一个具体的类型定义 及 其依赖的其他语言符号描述:
-- "Name": 模块名称
-- "Dir": 模块所处的相对仓库位置
- - "Description": 该方法的代码或总结, 格式为字符串
-- "Packages": 格式为数组。该数组中的每个对象表示此模块内每个子包描述:
- - "Name": 使用该包的名称,格式为字符串
- - "Description": 该包的总结,格式为字符串
-
-
-## 输出格式(text)
-直接输出总结内容。不要输出JSON(IMPORTANT)!
-
-## 总结内容
-- 该模块的主要功能和用途
-- 该模块的一些关键函数和类型的描述
-
-
-# 约束
-- 专注于高度总结模块的基本功能,避免深入具体实现细节。
-- 编写简短且易于理解的总结,供其他工程师参考。
-- 保持与提供的输入数据一致的技术术语。
-- 输出字符限制为2000字符。
-
-
-# 示例
-
-## 输入
-{
- "Name": "github.com/cloudwego/localsession",
- "Dir": ".",
- "Packages": [
- {
- "Description": "该包用于管理会话上下文,并定义了通用的Session接口",
- "Name": "github.com/cloudwego/localsession",
- },
- {
- "Description": "该包用于处理具体的上下文的metainfo等信息的兜底方式",
- "Name": "github.com/cloudwego/localsession/backup",
- }
- ]
-}
-
-## 输出
-此模块位于当前目录,为应用中的会话管理提供工具,特别关注会话上下文的备份和恢复机制。它包括创建、备份、恢复和清除会话上下文的功能,以及默认的上下文兜底方式。
-关键包:
- - github.com/cloudwego/localsession: 该包用于管理会话上下文,并定义了通用的Session接口
- - github.com/cloudwego/localsession/backup: 该包用于处理具体的上下文的metainfo等信息的兜底方式
-
-
-# 现在,请开始处理如下输入:
-
-{{DATA}}
-
-"##;
-
-const PROMPT_COMPRESS_MOD_EN: &str = r##"
-# Character
-You are an experienced engineer and have in-depth knowledge of its various modules. Your primary responsibility is to simplify and summarize the basic functionality of the module using data about common functions and types provided by other developers. Your goal is to help engineers who know less about these packages by making them easier to understand.
-
-# Tips
-
-## Input format (JSON)
-Contains a specific type definition and descriptions of other language symbols on which it depends:
-- Name: indicates the module name
-- "Dir": indicates the relative repository location of the module
-- "Description": indicates the code or summary of the method. The format is a string
-- "Packages": in array format. Each object in this array represents each subpackage description within this module:
-- "Name": indicates the name of the package. The format is a string
-- "Description": indicates the summary of the package. The format is a string
-
-
-## Output format (text)
-Output summary content directly. Do not output JSON (IMPORTANT)!
-
-## Summarize the content
-- Main functions and uses of the module
-- Description of some of the key functions and types of the module
-
-
-# Constraint
-- Focus on the basic functions of highly summarized modules and avoid delving into specific implementation details.
-- Write short and easy to understand summaries for other engineers to refer to.
-- Technical terms that are consistent with the input data provided.
-- The output character limit is 2000 characters.
-
-
-# Examples
-
-## Input
-{
- "Name": "github.com/cloudwego/localsession",
- "Dir": ".",
- "Packages": [
- {
- "Description": "This package manages the Session context and defines the generic session interface ",
- "Name": "github.com/cloudwego/localsession",
- },
- {
- "Description": "This package is used to handle the specific context of metainfo and other information in the back of the way ",
- "Name": "github.com/cloudwego/localsession/backup",
- }
- ]
-}
-
-## Output
-This module is located in the current directory and provides tools for session management in the application, with a special focus on backup and recovery mechanisms for session context. It includes the ability to create, back up, restore, and clear session context, as well as the default context bypass.
-Key package:
-- github.com/cloudwego/localsession: this package is used to manage Session context, and defines the general Session interface
-- github.com/cloudwego/localsession/backup: this package to deal with the specific context of the information such as the metainfo way out
-
-# Now, please summarize below input:
-{{DATA}}
-"##;
diff --git a/src/compress/mod.rs b/src/compress/mod.rs
deleted file mode 100644
index 220bb52c..00000000
--- a/src/compress/mod.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-pub mod compress;
-mod llm;
-pub mod parser;
-pub mod rust;
-pub mod types;
diff --git a/src/compress/parser.rs b/src/compress/parser.rs
deleted file mode 100644
index 7fa335e5..00000000
--- a/src/compress/parser.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use serde::{Deserialize, Serialize};
-
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Function {
- pub(crate) name: String,
- pub(crate) is_method: bool,
- pub(crate) is_third_party: bool,
- pub(crate) associated_struct: String,
- pub(crate) content: String,
- pub(crate) compressed_content: CompressedFunction,
- pub(crate) calls: Vec, // names of the functions this one calls
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct CompressedFunction {
- pub(crate) function_name: String,
- pub(crate) description: String,
- pub(crate) input: String,
- pub(crate) output: String,
- pub(crate) side_effect: String,
-}
-
-pub trait LanguageParser {
- fn new() -> Self;
- fn process(&mut self);
- fn summarize(&self, _function: &Function) -> String;
-}
\ No newline at end of file
diff --git a/src/compress/rust/mod.rs b/src/compress/rust/mod.rs
deleted file mode 100644
index 47e8e0d0..00000000
--- a/src/compress/rust/mod.rs
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright 2024 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::{cell::RefCell, collections::HashMap};
-
-pub fn convert_crate(path: &str) -> String {
- // extra last two parts
- let ps: Vec<&str> = path.split("/").collect();
- if ps.len() >= 2 {
- normalize_rust_import(format!("{}#{}", ps[ps.len() - 2], ps[ps.len() - 1]).as_str())
- } else {
- normalize_rust_import(path)
- }
-}
-
-pub fn normalize_rust_import(path: &str) -> String {
- let mut normalized = String::new();
- for ch in path.chars() {
- if !ch.is_ascii_alphabetic() && !ch.is_ascii_digit() && ch != '/' {
- normalized += "_";
- } else if ch == '/' {
- normalized.push_str("::");
- } else {
- normalized.push(ch);
- }
- }
- normalized.split("::").fold(String::new(), |acc, p| {
- if acc != "" {
- let p = if let Some(n) = avoid_rust_keywords(p) {
- n
- } else {
- p.to_string()
- };
- format!("{}::{}", acc, p)
- } else {
- p.to_string()
- }
- })
-}
-
-pub fn new_rust_impt(_crate: &str, path: &str) -> String {
- let p = if path != "" {
- normalize_rust_import(path)
- } else {
- "*".to_string()
- };
- return format!("{}::{}", normalize_rust_import(_crate), p);
-}
-
-static RUST_KEY_WORDS: [&str; 52] = [
- "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for",
- "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return",
- "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where",
- "while", "abstract", "alignof", "become", "box", "do", "final", "macro", "offsetof",
- "override", "priv", "proc", "pure", "sizeof", "typeof", "unsized", "virtual", "yield",
-];
-
-pub fn avoid_rust_keywords(word: &str) -> Option {
- if RUST_KEY_WORDS.contains(&word) {
- Some(format!("r#{}", word))
- } else {
- None
- }
-}
-
-// format and replace root
-pub fn replace_impt_crate(impt: &str, root: &Option, reop_id: &String) -> String {
- let impt = impt.strip_prefix("use ").unwrap();
- let mut impt = impt.replace(" ", "");
- // for path specify root, replace it
- if let Some(root) = root {
- if impt.starts_with(root) {
- // inside the root, replace it with crate
- impt = impt.replace(root, "crate")
- } else if impt.starts_with("crate::") {
- // outside the root, replace it with repo_id
- impt = impt.replace("crate", reop_id);
- }
- }
- format!("use {}", impt)
-}
-
-// rust error format:
-/*
-error: {{msg}}
- --> {{file:line:col}}
- |
-79 | xxxxxxxxxxxxx
- | ^^^^^^^^ help: xxxxx
- */
-pub fn extract_msg_from_err(err: &str, ignore: bool) -> HashMap<&str, String> {
- let mut files: HashMap<&str, String> = HashMap::new();
- let lines = err.split("\n").collect::>();
-
- let mut i = 0;
- 'out: while i < lines.len() {
- let line = lines[i];
- if line.contains("--> ") {
- let file = line
- .trim()
- .strip_prefix("--> ")
- .unwrap()
- .split(":")
- .nth(0)
- .unwrap();
- let msg = lines[i - 1];
- if ignore {
- // ignore specific errors
- for e in RUST_IGNORE_ERRS.iter() {
- if msg.contains(e) {
- i += 1;
- continue 'out;
- }
- }
- }
- let mut errs = vec![msg, line];
- for j in (i + 1)..lines.len() {
- let line = lines[j];
- if line.contains(" | ") {
- errs.push(line);
- } else {
- i = j;
- break;
- }
- }
- if let Some(old) = files.get_mut(file) {
- old.push_str("\n");
- old.push_str(&errs.join("\n"));
- } else {
- files.insert(file, errs.join("\n"));
- }
- }
- i += 1;
- }
- files
-}
-
-static RUST_IGNORE_ERRS: [&str; 5] = [
- "E0425", // not found in crate
- "E0412", // not found in crate
- "E0433", // use of undeclared crate or module
- "E0601", // consider adding a `main` function to
- "E0432", // unresolved import
-];
-
-#[derive(Debug, Clone)]
-pub struct Cargo {
- pub id: String,
- deps: RefCell>,
- bins: RefCell>,
-}
-
-impl Cargo {
- pub fn new(id: &str) -> Self {
- Cargo {
- id: normalize_rust_import(id.split("/").last().unwrap_or_default()),
- deps: RefCell::new(HashMap::new()),
- bins: RefCell::new(HashMap::new()),
- }
- }
-
- pub fn dep(&mut self, deps: &String) {
- let lines = deps.split("\n").collect::>();
- for line in lines {
- if !line.contains("=") {
- continue;
- }
- let (a, b) = line.split_once("=").unwrap();
- let name = a.trim();
- let mut version = b.trim();
- if let Some(i) = version.find("//") {
- version = &version[0..i];
- }
- self.deps
- .borrow_mut()
- .insert(name.to_string(), version.to_string());
- }
- }
-
- pub fn undep(&mut self, name: &String) {
- self.deps.borrow_mut().remove(name);
- }
-
- pub fn bin(&mut self, name: &String, path: &str) {
- self.bins
- .borrow_mut()
- .insert(name.clone(), path.to_string());
- }
-
- pub fn to_string(&mut self) -> String {
- let deps = self
- .deps
- .borrow()
- .iter()
- .filter(|(k, _)| !k.contains("crate"))
- .fold(String::new(), |acc, (k, v)| {
- format!("{}\n{} = {}", acc, k, v)
- });
- let bins = self.bins.borrow().iter().fold(String::new(), |acc, b| {
- format!(
- "{}\n[[bin]]\nname = \"{}\"\npath = \"src/{}/main.rs\"\n",
- acc, b.0, b.1
- )
- });
- format!(
- r#"[package]
-name = "{}"
-version = "0.1.0"
-edition = "2021"
-{}
-[dependencies]
-{}
-"#,
- self.id, bins, deps,
- )
- }
-}
diff --git a/src/compress/types/mod.rs b/src/compress/types/mod.rs
deleted file mode 100644
index ff1ce1c3..00000000
--- a/src/compress/types/mod.rs
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-mod test;
-pub mod types;
diff --git a/src/compress/types/test.rs b/src/compress/types/test.rs
deleted file mode 100644
index a4c7d206..00000000
--- a/src/compress/types/test.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::{cell::RefCell, collections::HashMap, rc::Rc};
-
-fn ensure_node_in_map<'a>(id: i32, map: &'a mut HashMap) -> &'a mut i32 {
- map.entry(id).or_insert(0)
-}
-
-fn test_multi_mut(data: HashMap>) {
- let graph = Rc::new(RefCell::new(HashMap::new()));
-
- for (k, _mod) in data {
- // First, get the value of `k` from the graph
- let mut inode_value = {
- let mut graph_ref = graph.borrow_mut();
- let inode = ensure_node_in_map(k, &mut graph_ref);
- *inode
- };
-
- for id in _mod {
- // Update the value of `id` in the graph with the value of `k`
- let mut graph_ref = graph.borrow_mut();
- let v = ensure_node_in_map(id, &mut graph_ref);
- *v = inode_value;
- inode_value += 1;
- }
-
- // update to k
- {
- let mut graph_ref = graph.borrow_mut();
- let inode = ensure_node_in_map(k, &mut graph_ref);
- *inode = inode_value;
- }
- }
-}
-
-#[test]
-fn test_main() {
- let data = vec![(1, vec![2, 3]), (2, vec![3, 4])];
- let mut map = HashMap::new();
- for (k, v) in data {
- let mut vec = Vec::new();
- for i in v {
- vec.push(i);
- }
- map.insert(k, vec);
- }
- test_multi_mut(map);
-}
diff --git a/src/compress/types/types.rs b/src/compress/types/types.rs
deleted file mode 100644
index a768b5de..00000000
--- a/src/compress/types/types.rs
+++ /dev/null
@@ -1,977 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use serde_json::{json, Value};
-use std::{
- cell::RefCell,
- collections::{HashMap, HashSet},
- hash::{Hash, Hasher},
- rc::Rc,
-};
-
-use crate::storage::cache::get_cache;
-use serde::{Deserialize, Serialize};
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Repository {
- #[serde(default)]
- pub id: String,
- #[serde(rename = "Modules")]
- pub modules: HashMap,
-
- // constructed by ABCoder
- #[serde(rename = "Graph")]
- pub graph: Option>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, Default)]
-pub struct Node {
- #[serde(rename = "Type")]
- pub r#type: NodeType,
- #[serde(rename = "ModPath")]
- pub mod_path: String,
- #[serde(rename = "PkgPath")]
- pub(crate) pkg_path: String,
- #[serde(rename = "Name")]
- pub(crate) name: String,
- #[serde(rename = "Dependencies")]
- pub dependencies: Option>,
- #[serde(rename = "References")]
- pub references: Option>,
-}
-
-impl Node {
- pub fn id(&self) -> Identity {
- Identity {
- mod_path: self.mod_path.clone(),
- pkg_path: self.pkg_path.clone(),
- name: self.name.clone(),
- }
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub enum RelationKind {
- Dependency,
- Reference,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Relation {
- #[serde(rename = "ModPath")]
- pub mod_path: String,
- #[serde(rename = "PkgPath")]
- pub(crate) pkg_path: String,
- #[serde(rename = "Name")]
- pub(crate) name: String,
- #[serde(rename = "Kind")]
- pub(crate) kind: RelationKind,
- #[serde(rename = "Desc")]
- pub(crate) desc: Option,
- #[serde(rename = "Codes")]
- pub(crate) codes: Option,
-}
-
-impl Relation {
- pub fn id(&self) -> Identity {
- Identity {
- mod_path: self.mod_path.clone(),
- pkg_path: self.pkg_path.clone(),
- name: self.name.clone(),
- }
- }
-}
-
-#[derive(Debug, Clone, Default)]
-pub enum NodeType {
- #[default]
- Unknown,
- Func,
- Type,
- Var,
-}
-
-impl NodeType {
- pub fn to_string(&self) -> String {
- match self {
- NodeType::Func => "FUNC".to_string(),
- NodeType::Type => "TYPE".to_string(),
- NodeType::Var => "VAR".to_string(),
- _ => "UNKNOWN".to_string(),
- }
- }
-}
-
-impl Serialize for NodeType {
- fn serialize(&self, serializer: S) -> Result
- where
- S: serde::Serializer,
- {
- serializer.serialize_str(&self.to_string())
- }
-}
-
-impl<'de> Deserialize<'de> for NodeType {
- fn deserialize(deserializer: D) -> Result
- where
- D: serde::Deserializer<'de>,
- {
- let s = String::deserialize(deserializer)?;
- match s.as_str() {
- "FUNC" => Ok(NodeType::Func),
- "TYPE" => Ok(NodeType::Type),
- "VAR" => Ok(NodeType::Var),
- _ => Ok(NodeType::Unknown),
- }
- }
-}
-
-impl Repository {
- pub fn is_external_mod(&self, mod_path: &str) -> bool {
- return mod_path.contains("@") || mod_path == "std" || mod_path == "";
- }
-
- pub fn get_id_content(&self, id: &Identity) -> Option {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- if let Some(f) = pkg.functions.get(&id.name) {
- return Some(f.content.clone());
- } else if let Some(t) = pkg.types.get(&id.name) {
- return Some(t.content.clone());
- } else if let Some(v) = pkg.vars.get(&id.name) {
- return Some(v.content.clone());
- }
- }
- }
- None
- }
-
- pub fn get_id_content_ref(&self, id: &Identity) -> Option<&String> {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- if let Some(f) = pkg.functions.get(&id.name) {
- return Some(&f.content);
- } else if let Some(t) = pkg.types.get(&id.name) {
- return Some(&t.content);
- } else if let Some(v) = pkg.vars.get(&id.name) {
- return Some(&v.content);
- }
- }
- }
- None
- }
-
- pub fn get_pkg(&self, id: &Identity) -> Option<(&Module, &Package)> {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- return Some((m, pkg));
- }
- }
- None
- }
-
- pub fn inside_main_pkg(&self, m: &str, pkg: &str) -> Option<&String> {
- if let Some(m) = self.modules.get(m) {
- for (name, p) in m.packages.iter() {
- if p.is_main && pkg.starts_with(name) {
- return Some(&p.id);
- }
- }
- }
- None
- }
-
- pub fn remove_id(&mut self, id: &Identity) {
- for (_, _mod) in self.modules.iter_mut() {
- for (_, pkg) in _mod.packages.iter_mut() {
- if let Some(_) = pkg.functions.remove(&id.name) {
- continue;
- } else if let Some(_) = pkg.types.remove(&id.name) {
- continue;
- } else if let Some(_) = pkg.vars.remove(&id.name) {
- continue;
- }
- }
- }
- }
-
- pub fn remove_unreferenced(&mut self, reffered: &HashSet<&Identity>) {
- // filter all external (or "kitex_gen" or "hertz_gen") and not referred nodes in repo
- for (_, v) in self.graph.clone().unwrap().iter() {
- if (!v.id().inside()
- || v.pkg_path.contains("kitex_gen")
- || v.pkg_path.contains("hertz_gen"))
- && !reffered.contains(&v.id())
- {
- // remove id in repo
- self.remove_id(&v.id());
- }
- }
- }
-
- pub fn contains(&self, id: &Identity) -> bool {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(p) = m.packages.get(&id.pkg_path) {
- if let Some(_) = p.functions.get(&id.name) {
- return true;
- } else if let Some(_) = p.types.get(&id.name) {
- return true;
- }
- }
- }
- false
- }
-
- pub fn merge_with(&mut self, other: &Repository) {
- for (mod_name, _mod) in other.modules.iter() {
- if let Some(smod) = self.modules.get_mut(mod_name) {
- for (pkg_name, pkg) in _mod.packages.iter() {
- if let Some(p) = smod.packages.get_mut(pkg_name) {
- p.merge_with(pkg);
- } else {
- smod.packages.insert(pkg_name.clone(), pkg.clone());
- }
- smod.name = _mod.name.clone();
- smod.dir = _mod.dir.clone();
- smod.dependencies = _mod.dependencies.clone();
- }
- } else {
- self.modules.insert(mod_name.clone(), _mod.clone());
- }
- }
- self.graph = other.graph.clone();
- }
-
- pub fn save_to_cache(&self) {
- let repo = serde_json::to_string(&self).expect("marshal struct error");
- get_cache()
- .put(self.id.as_ref(), Vec::from(repo))
- .expect("save to cache failed");
- }
-
- pub fn get_func(&self, id: &Identity) -> Option<&Function> {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- if let Some(f) = pkg.functions.get(&id.name) {
- return Some(f);
- }
- }
- }
- None
- }
-
- pub fn get_type(&self, id: &Identity) -> Option<&Struct> {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- if let Some(f) = pkg.types.get(&id.name) {
- return Some(f);
- }
- }
- }
- None
- }
-
- pub fn get_kind(&self, id: &Identity) -> NodeType {
- if let Some(func) = self.get_func(id) {
- return NodeType::Func;
- } else if let Some(t) = self.get_type(id) {
- return NodeType::Type;
- } else if let Some(v) = self.get_var(id) {
- return NodeType::Var;
- } else if let Some(code) = self.get_id_content(id) {
- NodeType::Unknown
- } else {
- return NodeType::Unknown;
- }
- }
-
- pub fn get_file_line<'a>(&'a self, id: &'a Identity) -> FileLine {
- if let Some(func) = self.get_func(id) {
- return FileLine {
- pkg: &id.pkg_path,
- file: &func.file,
- line: func.line,
- };
- } else if let Some(t) = self.get_type(id) {
- return FileLine {
- pkg: &id.pkg_path,
- file: &t.file,
- line: t.line,
- };
- } else if let Some(v) = self.get_var(id) {
- return FileLine {
- pkg: &id.pkg_path,
- file: &v.file,
- line: v.line,
- };
- } else {
- FileLine {
- pkg: "",
- file: "",
- line: 0,
- }
- }
- }
-
- pub fn is_exported(&self, id: &Identity) -> bool {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- if let Some(f) = pkg.functions.get(&id.name) {
- return f.is_exported;
- } else if let Some(t) = pkg.types.get(&id.name) {
- return t.is_exported;
- } else if let Some(v) = pkg.vars.get(&id.name) {
- return v.is_exported;
- }
- }
- }
- false
- }
-
- pub fn get_var(&self, id: &Identity) -> Option<&Variant> {
- if let Some(m) = self.modules.get(&id.mod_path) {
- if let Some(pkg) = m.packages.get(&id.pkg_path) {
- if let Some(f) = pkg.vars.get(&id.name) {
- return Some(f);
- }
- }
- }
- None
- }
-
- pub fn get_node(&self, id: &Identity) -> Option<&Node> {
- if let Some(graph) = &self.graph {
- return graph.get(&String::from(id));
- }
- None
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Module {
- #[serde(rename = "Name")]
- pub name: String,
- #[serde(rename = "Dir")]
- pub dir: String,
- #[serde(rename = "Dependencies")]
- pub dependencies: Option>,
- #[serde(rename = "Packages")]
- pub packages: HashMap,
- #[serde(rename = "Files")]
- pub files: Option>,
- #[serde(rename = "Language", default)]
- pub language: String,
- #[serde(rename = "compress_data")]
- pub compress_data: Option,
-}
-
-impl Module {
- pub fn to_compress(&self) -> ToCompressModule {
- let mut packages = Vec::new();
- for (_, p) in self.packages.iter() {
- packages.push(Description {
- name: &p.id,
- description: p.compress_data.as_ref().unwrap(),
- });
- }
-
- ToCompressModule {
- name: &self.name,
- dir: &self.dir,
- packages: Some(packages),
- }
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct File {
- #[serde(rename = "Name")]
- pub name: String,
- #[serde(rename = "Path")]
- pub path: String,
- #[serde(rename = "Imports")]
- pub imports: Option>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Import {
- #[serde(rename = "Alias")]
- pub alias: Option,
- #[serde(rename = "Path")]
- pub path: String,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Package {
- #[serde(rename = "PkgPath", default)]
- pub id: String,
- #[serde(rename = "IsMain", default)]
- pub is_main: bool,
- #[serde(rename = "Dependencies", default)]
- pub dependencies: Vec,
- #[serde(rename = "Functions")]
- pub functions: HashMap,
- #[serde(rename = "Types")]
- pub types: HashMap,
- #[serde(rename = "Vars")]
- pub vars: HashMap,
- #[serde(rename = "compress_data")]
- pub compress_data: Option,
-}
-
-fn format_file(file: &str) -> String {
- // count non-alphanumeric characters
- let mut i = 0;
- for c in file.chars() {
- if !c.is_alphanumeric() {
- i += 1;
- }
- }
- if i >= 2 {
- // replace non-alphanumeric characters with '_' before last
- file.replace(
- |c: char| -> bool {
- if !c.is_alphanumeric() && i > 1 {
- i -= 1;
- return true;
- }
- false
- },
- "_",
- )
- } else {
- file.to_string()
- }
-}
-
-#[test]
-fn test_format_file() {
- assert_eq!(format_file("a/b/c.go"), "a_b_c.go");
- assert_eq!(format_file("x.pb.go"), "x_pb.go");
-}
-
-impl Package {
- pub fn to_files(&self) -> HashMap> {
- let mut files: HashMap> = HashMap::new();
- for (_, f) in self.functions.iter() {
- let file = format_file(&f.file);
- if let Some(nodes) = files.get_mut(&file) {
- nodes.push(f.id());
- } else {
- files.insert(file, vec![f.id()]);
- }
- }
- for (_, t) in self.types.iter() {
- let file = format_file(&t.file);
- if let Some(nodes) = files.get_mut(&file) {
- nodes.push(t.id());
- } else {
- files.insert(file, vec![t.id()]);
- }
- }
- for (_, v) in self.vars.iter() {
- let file = format_file(&v.file);
- if let Some(nodes) = files.get_mut(&file) {
- nodes.push(v.id());
- } else {
- files.insert(file, vec![v.id()]);
- }
- }
- files
- }
-
- pub fn merge_with(&mut self, other: &Package) {
- self.id = other.id.clone();
- for (name, f) in other.functions.iter() {
- if let Some(func) = self.functions.get_mut(name) {
- func.merge_with(f);
- } else {
- self.functions.insert(name.clone(), f.clone());
- }
- }
- for (name, t) in other.types.iter() {
- if let Some(typ) = self.types.get_mut(name) {
- typ.merge_with(t);
- } else {
- self.types.insert(name.clone(), t.clone());
- }
- }
- for (name, v) in other.vars.iter() {
- if let Some(var) = self.vars.get_mut(name) {
- var.merge_with(v);
- } else {
- self.vars.insert(name.clone(), v.clone());
- }
- }
- }
-
- // output a json string of exported functions and types, schema:
- // {"Functions":[{"Name":"", "Description":"", "Signature":""}], "Types":[{"Name":"", "Description":"", "Signature":""}]}
- pub fn to_compress(&self) -> ToCompressPkg {
- // functions
- let mut funcs = Vec::new();
- for (name, f) in self.functions.iter() {
- // skip non-exported functions
- if !f.is_exported {
- continue;
- }
- if let Some(d) = &f.compress_data {
- funcs.push(Description {
- name: name,
- description: d,
- });
- }
- }
- // types
- let mut types = Vec::new();
- for (name, t) in self.types.iter() {
- // skip non-exported types
- if !t.is_exported {
- continue;
- }
- if let Some(d) = &t.compress_data {
- types.push(Description {
- name: name,
- description: d,
- });
- }
- }
- // vars
- let mut vars = Vec::new();
- for (name, t) in self.vars.iter() {
- // skip non-exported types
- if !t.is_exported {
- continue;
- }
- if let Some(d) = &t.compress_data {
- vars.push(Description {
- name: name,
- description: d,
- });
- }
- }
- return ToCompressPkg {
- path: &self.id,
- funcs: Some(funcs),
- types: Some(types),
- vars: Some(vars),
- };
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, Default)]
-pub struct Variant {
- #[serde(rename = "ModPath")]
- pub mod_path: String,
- #[serde(rename = "PkgPath")]
- pub pkg_path: String,
- #[serde(rename = "Name")]
- pub name: String,
- #[serde(rename = "File")]
- pub file: String,
- #[serde(rename = "Line")]
- pub line: u32,
- #[serde(rename = "IsExported", default)]
- pub is_exported: bool,
- #[serde(rename = "IsConst", default)]
- pub is_const: bool,
- #[serde(rename = "Type")]
- pub type_id: Option,
- #[serde(rename = "IsPointer", default)]
- pub is_pointer: bool,
- #[serde(rename = "Content")]
- pub content: String,
-
- // compress_data
- #[serde(rename = "compress_data")]
- pub compress_data: Option,
-}
-
-impl Variant {
- pub fn id(&self) -> Identity {
- Identity {
- mod_path: self.mod_path.clone(),
- pkg_path: self.pkg_path.clone(),
- name: self.name.clone(),
- }
- }
-
- pub fn merge_with(&mut self, other: &Variant) {
- let compress = self.compress_data.clone();
- *self = other.clone();
- self.compress_data = compress;
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Function {
- #[serde(rename = "ModPath")]
- pub mod_path: String,
- #[serde(rename = "PkgPath")]
- pub pkg_path: String,
- #[serde(rename = "Name")]
- pub name: String,
- #[serde(rename = "File")]
- pub file: String,
- #[serde(rename = "Line")]
- pub line: u32,
- #[serde(rename = "Exported", default)]
- pub is_exported: bool,
- #[serde(rename = "IsMethod", default)]
- is_method: bool,
- #[serde(rename = "IsInterfaceMethod", default)]
- is_interface_method: bool,
- #[serde(rename = "Content")]
- pub content: String,
- #[serde(rename = "Receiver")]
- pub receiver: Option,
- #[serde(rename = "Params")]
- pub params: Option>,
- #[serde(rename = "Results")]
- pub results: Option>,
- #[serde(rename = "FunctionCalls")]
- pub function_calls: Option>,
- #[serde(rename = "MethodCalls")]
- pub method_calls: Option>,
- #[serde(rename = "Types")]
- pub types: Option>,
- #[serde(rename = "GlobalVars")]
- pub global_vars: Option>,
-
- // compress_data
- #[serde(rename = "compress_data")]
- pub compress_data: Option,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Receiver {
- #[serde(rename = "IsPointer")]
- pub is_pointer: bool,
- #[serde(rename = "Type")]
- pub type_id: Identity,
-}
-
-impl Function {
- pub fn id(&self) -> Identity {
- Identity {
- mod_path: self.mod_path.clone(),
- pkg_path: self.pkg_path.clone(),
- name: self.name.clone(),
- }
- }
-
- pub fn merge_with(&mut self, other: &Function) {
- let compress = self.compress_data.clone();
- *self = other.clone();
- self.compress_data = compress;
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Struct {
- #[serde(rename = "ModPath")]
- pub mod_path: String,
- #[serde(rename = "PkgPath")]
- pub(crate) pkg_path: String,
- #[serde(rename = "Name")]
- pub(crate) name: String,
- #[serde(rename = "File")]
- pub file: String,
- #[serde(rename = "Line")]
- pub line: u32,
- #[serde(rename = "Exported", default)]
- pub is_exported: bool,
- #[serde(rename = "TypeKind")]
- pub type_kind: String,
- #[serde(rename = "Content")]
- pub(crate) content: String,
- #[serde(rename = "SubStruct")]
- pub(crate) sub_struct: Option>,
- #[serde(rename = "InlineStruct")]
- pub(crate) inline_struct: Option>,
- #[serde(rename = "Methods")]
- pub(crate) methods: Option>,
-
- // compress_data
- #[serde(rename = "compress_data")]
- pub compress_data: Option,
-}
-
-impl Struct {
- pub fn id(&self) -> Identity {
- Identity {
- mod_path: self.mod_path.clone(),
- pkg_path: self.pkg_path.clone(),
- name: self.name.clone(),
- }
- }
- pub fn merge_with(&mut self, other: &Struct) {
- let compress = self.compress_data.clone();
- *self = other.clone();
- self.compress_data = compress;
- }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, Eq, Default)]
-pub struct Identity {
- #[serde(rename = "ModPath")]
- pub mod_path: String,
- #[serde(rename = "PkgPath")]
- pub pkg_path: String,
- #[serde(rename = "Name")]
- pub name: String,
- // #[serde(rename = "Extra", skip_serializing_if = "Option::is_none")]
- // pub extra: Option>,
-}
-
-impl Identity {
- pub fn inside(&self) -> bool {
- self.mod_path != "" && !self.mod_path.contains("@")
- }
-
- pub fn to_string(&self) -> String {
- format!("{}?{}#{}", self.mod_path, self.pkg_path, self.name,)
- }
-}
-
-impl From<&Identity> for String {
- fn from(id: &Identity) -> Self {
- format!("{}?{}#{}", id.mod_path, id.pkg_path, id.name,)
- }
-}
-
-impl From<&String> for Identity {
- fn from(s: &String) -> Self {
- let mut parts = s.split('?');
- let mod_path = parts.next().unwrap();
- let mut parts = parts.next().unwrap().split('#');
- let pkg_path = parts.next().unwrap();
- let name = parts.next().unwrap();
- Identity {
- mod_path: mod_path.to_string(),
- pkg_path: pkg_path.to_string(),
- name: name.to_string(),
- }
- }
-}
-
-impl Hash for Identity {
- fn hash(&self, state: &mut H) {
- self.mod_path.hash(state);
- self.pkg_path.hash(state);
- self.name.hash(state);
- }
-}
-
-impl PartialEq for Identity {
- fn eq(&self, other: &Self) -> bool {
- self.mod_path == other.mod_path
- && self.pkg_path == other.pkg_path
- && self.name == other.name
- }
-}
-
-#[derive(PartialEq, Eq, PartialOrd, Ord)]
-pub struct FileLine<'a> {
- pub pkg: &'a str,
- pub file: &'a str,
- pub line: u32,
-}
-
-#[derive(Serialize, Debug)]
-pub(crate) struct ToCompressPkg<'a> {
- #[serde(rename = "PkgPath")]
- pub(crate) path: &'a str,
- #[serde(rename = "Functions")]
- pub(crate) funcs: Option>>,
- #[serde(rename = "Types")]
- pub(crate) types: Option>>,
- #[serde(rename = "Variables")]
- pub(crate) vars: Option>>,
-}
-
-#[derive(Serialize, Debug)]
-pub(crate) struct ToCompressModule<'a> {
- #[serde(rename = "Name")]
- pub(crate) name: &'a str,
- #[serde(rename = "Dir")]
- pub(crate) dir: &'a str,
- #[serde(rename = "Packages")]
- pub(crate) packages: Option>>,
-}
-
-#[derive(Serialize, Debug)]
-pub(crate) struct Description<'a> {
- pub name: &'a str,
- pub description: &'a str,
-}
-
-#[derive(Serialize, Debug)]
-pub(crate) struct ToCompressVar<'a> {
- #[serde(rename = "Content")]
- pub(crate) content: &'a String,
- #[serde(rename = "Type", skip_serializing_if = "Option::is_none")]
- pub(crate) r#type: &'a Option,
- #[serde(rename = "References")]
- pub(crate) refers: Vec,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub(crate) struct ToCompressFunc {
- #[serde(rename = "Content")]
- pub(crate) content: String,
- #[serde(rename = "Receiver", skip_serializing_if = "Option::is_none")]
- pub(crate) receiver: Option,
- #[serde(rename = "Params", skip_serializing_if = "Option::is_none")]
- pub(crate) params: Option>,
- #[serde(rename = "Results", skip_serializing_if = "Option::is_none")]
- pub(crate) results: Option>,
- #[serde(rename = "Related_func")]
- pub(crate) related_func: Option>,
- #[serde(rename = "Related_type")]
- pub(crate) related_type: Option>,
- #[serde(rename = "Related_var")]
- pub(crate) related_var: Option>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub(crate) struct ToCompressType {
- #[serde(rename = "Content")]
- pub(crate) content: String,
- #[serde(rename = "Related_methods")]
- pub(crate) related_methods: Option>,
- #[serde(rename = "Related_types")]
- pub(crate) related_types: Option>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub(crate) struct CalledType {
- #[serde(rename = "CallName")]
- pub call_name: String,
- #[serde(rename = "Description")]
- pub description: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub(crate) struct KeyValueType {
- #[serde(rename = "Name")]
- pub name: String,
- #[serde(rename = "Description")]
- pub description: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub(crate) struct ToConvert {
- #[serde(rename = "Name")]
- pub(crate) name: String,
- #[serde(rename = "Receiver", skip_serializing_if = "Option::is_none")]
- pub(crate) receiver: Option,
- #[serde(rename = "Definition")]
- pub(crate) definition: String,
- #[serde(rename = "Dependencies")]
- pub(crate) dependencies: HashMap,
- #[serde(rename = "References")]
- pub(crate) references: HashMap,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub(crate) struct Reference {
- // #[serde(rename = "NeedMock")]
- // pub(crate) need_mock: bool,
- #[serde(rename = "Name", skip_serializing_if = "Option::is_none")]
- pub(crate) name: Option,
- #[serde(rename = "Code")]
- pub(crate) code: String,
- #[serde(rename = "ImportPath", skip_serializing_if = "Option::is_none")]
- pub(crate) import: Option,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct ToMerge {
- #[serde(rename = "ID")]
- pub(crate) id: String,
- #[serde(rename = "Code")]
- pub(crate) code: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct ToValidate {
- #[serde(rename = "Code")]
- pub(crate) code: String,
- #[serde(rename = "Error")]
- pub(crate) error: String,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Code {
- #[serde(rename = "Code")]
- pub code: String,
- #[serde(rename = "Imports", skip_serializing_if = "Option::is_none")]
- pub imports: Option>,
- #[serde(rename = "Crates", skip_serializing_if = "Option::is_none")]
- pub crates: Option,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct CodeCache {
- pub id: String,
- pub nodes: HashMap,
- #[serde(default)]
- pub files: HashMap,
-}
-
-impl CodeCache {
- pub fn new(id: String) -> Self {
- CodeCache {
- nodes: HashMap::new(),
- id: id,
- files: HashMap::new(),
- }
- }
-
- pub fn get(&self, id: &str) -> Option<&Code> {
- self.nodes.get(id)
- }
-
- pub fn get_by_id(&self, id: &Identity) -> Option<&Code> {
- self.nodes.get(&String::from(id))
- }
-
- pub fn insert(&mut self, id: &str, code: Code) {
- self.nodes.insert(id.to_string(), code);
- }
-
- pub fn insert_by_id(&mut self, id: &Identity, code: Code) {
- self.nodes.insert(String::from(id), code);
- }
-
- pub fn save_to_cache(&self) {
- let repo = serde_json::to_string(&self).expect("marshal struct error");
- get_cache()
- .put(&self.id, Vec::from(repo))
- .expect("save to cache failed");
- }
-
- pub fn load_from_cache(&mut self) -> bool {
- if let Some(repo) = get_cache().get(&self.id) {
- if let Ok(repo) = String::from_utf8(repo) {
- if let Ok(repo) = serde_json::from_str(&repo) {
- *self = repo;
- return true;
- }
- }
- }
- false
- }
-}
diff --git a/src/config/mod.rs b/src/config/mod.rs
deleted file mode 100644
index e1a64095..00000000
--- a/src/config/mod.rs
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-
-// limitations under the License.
-use std::path::{Path, PathBuf};
-
-use lazy_static::lazy_static;
-use serde::Deserialize;
-
-use crate::parse;
-
-#[derive(Debug)]
-pub enum Language {
- English,
- Chinese,
-}
-
-#[derive(Debug)]
-pub struct Config {
- pub work_dir: String,
- pub parser_dir: String,
- pub api_type: String,
- pub maas_model_name: String,
- pub mass_http_url: String,
-
- pub coze_api_token: Option,
- pub coze_bot_id: Option,
-
- pub ollama_model: Option,
-
- pub language: Language,
- pub exclude_dirs: Vec,
-}
-
-fn default_work_dir() -> String {
- "tmp_abcoder".to_string()
-}
-
-fn default_parser_dir() -> String {
- "tools/parser".to_string()
-}
-
-fn default_api_type() -> String {
- "maas".to_string()
-}
-
-fn default_maas_model_name() -> String {
- "".to_string()
-}
-
-impl Config {
- pub fn new() -> Self {
- Self {
- work_dir: default_work_dir(),
- parser_dir: default_parser_dir(),
- api_type: default_api_type(),
- maas_model_name: default_maas_model_name(),
- mass_http_url: "".to_string(),
- coze_api_token: None,
- coze_bot_id: None,
- ollama_model: None,
- exclude_dirs: vec![],
- language: Language::Chinese,
- }
- }
-
- pub fn parse_from_env() -> Self {
- let mut s = Self {
- work_dir: std::env::var("WORK_DIR").unwrap_or_else(|_| default_work_dir()),
- parser_dir: std::env::var("PARSER_DIR").unwrap_or_else(|_| default_parser_dir()),
- api_type: std::env::var("API_TYPE").unwrap_or_else(|_| default_api_type()),
- maas_model_name: std::env::var("MAAS_MODEL_NAME")
- .unwrap_or_else(|_| default_maas_model_name()),
- mass_http_url: std::env::var("MASS_HTTP_URL").unwrap_or_else(|_| "".to_string()),
- coze_api_token: std::env::var("COZE_API_TOKEN").ok(),
- coze_bot_id: std::env::var("COZE_BOT_ID").ok(),
- ollama_model: std::env::var("OLLAMA_MODEL").ok(),
- exclude_dirs: std::env::var("EXCLUDE_DIRS")
- .map(|v| v.split(',').map(|s| s.to_string()).collect())
- .unwrap_or_else(|_| vec![]),
- language: std::env::var("LANGUAGE")
- .map(|v| match v.as_str() {
- "en" => Language::English,
- "zh" => Language::Chinese,
- _ => Language::Chinese,
- })
- .unwrap_or(Language::Chinese),
- };
- // if work_dir is not absolute path, make it absolute path
- if !Path::new(&s.work_dir).is_absolute() {
- s.work_dir = PathBuf::from(std::env::current_dir().unwrap())
- .join(s.work_dir)
- .to_str()
- .unwrap()
- .to_string();
- }
- s
- }
-}
-
-lazy_static! {
- pub static ref CONFIG: Config = {
- dotenv::dotenv().ok();
- Config::parse_from_env()
- };
-}
-
-pub fn parser_path() -> String {
- Path::new(&CONFIG.parser_dir)
- .join("lang")
- .to_str()
- .unwrap()
- .to_string()
-}
-
-pub enum ProgramLanguage {
- Rust,
- Go,
- Unknown(String),
-}
-
-impl ProgramLanguage {
- pub fn to_string(&self) -> String {
- match self {
- ProgramLanguage::Rust => "rust".to_string(),
- ProgramLanguage::Go => "go".to_string(),
- ProgramLanguage::Unknown(s) => s.to_string(),
- }
- }
-}
-
-fn decide_language(path: &str) -> ProgramLanguage {
- // scan root directory
- walkdir::WalkDir::new(path)
- .max_depth(2)
- .into_iter()
- .filter_map(|entry| {
- let binding = entry.unwrap();
- let path = binding.path();
- if !path.is_dir() {
- let name = path.file_name().unwrap().to_str().unwrap();
- if name == "Cargo.toml" {
- return Some(ProgramLanguage::Rust);
- }
- if name == "go.mod" {
- return Some(ProgramLanguage::Go);
- }
- }
- None
- })
- .next()
- .unwrap_or(ProgramLanguage::Unknown(path.to_string()))
-}
-
-pub fn parser_and_args<'a>(
- repo_path: &'a str,
- opts: &parse::CompressOptions,
-) -> (String, Vec) {
- let lang = decide_language(repo_path);
- let path = parser_path();
- println!("parser path: {:?}", path);
- let mut args = vec![
- "collect".to_string(),
- lang.to_string(),
- repo_path.to_string(),
- ];
- for exclude in &CONFIG.exclude_dirs {
- args.push(format!("--exclude={exclude}"));
- }
- if !opts.not_load_external_symbol {
- args.push("--load-external-symbol".to_string());
- }
- if opts.no_need_comment {
- args.push("--no-need-comment".to_string());
- }
- (path, args)
-}
diff --git a/src/export.rs b/src/export.rs
deleted file mode 100644
index f2569150..00000000
--- a/src/export.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-use std::{
- fs::{self, File},
- io::Write,
- path::{Path, PathBuf},
-};
-
-use csv::Writer;
-
-use crate::{
- compress::{compress, types::types::Repository},
- config::{self, CONFIG},
- storage::cache,
- utils::{cmd, errors::Error, git, split},
-};
-
-#[derive(Clone, Debug, Default)]
-pub struct ExportOptions {
- pub csv: bool,
- pub public_only: bool,
- pub output: Option,
-}
-
-fn to_csv_summary(repo: &Repository) -> String {
- let mut w = Writer::from_writer(Vec::new());
- // add header
- w.write_record(&["Package", "Name", "Kind", "Signature", "Summary"])
- .unwrap();
-
- for (mod_name, _mod) in repo.modules.iter() {
- for (pname, pkg) in _mod.packages.iter() {
- for (name, f) in pkg.functions.iter() {
- let tmp = &"".to_string();
- // split content, 1024B for each
- let sums = split::split_text(f.compress_data.as_ref().unwrap_or(tmp), 924);
- for sum in sums {
- w.write_record(&[
- pname,
- name,
- "Function",
- f.content.split_once('\n').unwrap_or((&f.content, "")).0,
- &format!("{}: {}", name, sum),
- ])
- .unwrap();
- }
- }
- for (name, t) in pkg.types.iter() {
- let tmp = &"".to_string();
- // split content, 1024B for each
- let sums = split::split_text(t.compress_data.as_ref().unwrap_or(tmp), 924);
- for sum in sums {
- w.write_record(&[
- pname,
- name,
- "Type",
- t.content.split_once('\n').unwrap_or((&t.content, "")).0,
- &format!("{}: {}", name, sum),
- ])
- .unwrap();
- }
- }
- for (name, v) in pkg.vars.iter() {
- let tmp = &"".to_string();
- // split content, 1024B for each
- let sums = split::split_text(v.compress_data.as_ref().unwrap_or(tmp), 924);
- for sum in sums {
- w.write_record(&[
- pname,
- name,
- "Var",
- v.content.split_once('\n').unwrap_or((&v.content, "")).0,
- &format!("{}: {}", name, sum),
- ])
- .unwrap();
- }
- }
- }
- }
- w.flush().unwrap();
- String::from_utf8(w.into_inner().unwrap()).unwrap()
-}
-
-pub fn to_csv_decl(repo: &Repository) -> String {
- let mut w = Writer::from_writer(Vec::new());
- // add header
- w.write_record(&["Identity", "Kind", "Definition"]).unwrap();
-
- for (mod_name, _mod) in repo.modules.iter() {
- for (pname, pkg) in _mod.packages.iter() {
- for (name, f) in pkg.functions.iter() {
- let decl = f.content.as_str();
- // split content, 1024B for each
- let mut start = 0;
- let mut end = 1024;
- while start < decl.len() {
- if end > decl.len() {
- end = decl.len();
- }
- if start >= 1024 {
- start -= 100;
- }
- w.write_record(&[
- &format!("{}.{}", pname, name),
- "Function",
- &decl[start..end],
- ])
- .unwrap();
- start = end;
- end += 924;
- }
- }
- for (name, t) in pkg.types.iter() {
- let decl = t.content.as_str();
- // split content, 1024B for each
- let mut start = 0;
- let mut end = 1024;
- while start < decl.len() {
- if end > decl.len() {
- end = decl.len();
- }
- if start >= 1024 {
- start -= 100;
- }
- w.write_record(&[&format!("{}.{}", pname, name), "Type", &decl[start..end]])
- .unwrap();
- start = end;
- end += 924;
- }
- }
- for (name, v) in pkg.vars.iter() {
- let decl = v.content.as_str();
- // split content, 1024B for each
- let mut start = 0;
- let mut end = 1024;
- while start < decl.len() {
- if end > decl.len() {
- end = decl.len();
- }
- if start >= 1024 {
- start -= 100;
- }
- w.write_record(&[&format!("{}.{}", pname, name), "Var", &decl[start..end]])
- .unwrap();
- start = end;
- end += 924;
- }
- }
- }
- }
- w.flush().unwrap();
- String::from_utf8(w.into_inner().unwrap()).unwrap()
-}
-
-pub fn to_csv_pkgs(repo: &Repository) -> String {
- let mut w = Writer::from_writer(Vec::new());
- // add header
- w.write_record(&["Name", "Summary"]).unwrap();
-
- for (mod_name, _mod) in repo.modules.iter() {
- for (pname, pkg) in _mod.packages.iter() {
- // split comress_data into chunks
- let empty = &"".to_string();
- let sums = split::split_text(pkg.compress_data.as_ref().unwrap_or(empty), 924);
- for sum in sums {
- w.write_record(&[&format!("{}", pname), &format!("{}: {}", pname, sum)])
- .unwrap();
- }
- }
- }
- w.flush().unwrap();
- String::from_utf8(w.into_inner().unwrap()).unwrap()
-}
-
-pub fn to_markdown(repo: &Repository, opts: &ExportOptions) -> String {
- let mut md = String::new();
-
- for (mod_name, module) in repo.modules.iter() {
- if repo.is_external_mod(mod_name) {
- continue;
- }
-
- // 添加模块标题
- md.push_str(&format!("# {}\n\n", mod_name));
- let lang = &module.language;
-
- for (pkg_name, pkg) in module.packages.iter() {
- // 添加包标题
- md.push_str(&format!("## {}\n\n", pkg_name));
- if let Some(data) = &pkg.compress_data {
- md.push_str(&format!("{}\n\n", data));
- }
-
- // 添加函数
- for (func_name, func) in pkg.functions.iter() {
- if opts.public_only && !repo.is_exported(&func.id()) {
- continue;
- }
-
- md.push_str(&format!("### {}\n\n", func_name));
- if let Some(data) = &func.compress_data {
- md.push_str(&format!("{}\n\n", data));
- }
- md.push_str(&format!("- Position\n\n{}:{}\n\n", func.file, func.line));
- md.push_str(&format!(
- "- Codes\n\n```{}\n{}\n```\n\n",
- lang, func.content
- ));
- }
-
- // 添加类型
- for (type_name, typ) in pkg.types.iter() {
- if opts.public_only && !repo.is_exported(&typ.id()) {
- continue;
- }
-
- md.push_str(&format!("### {}\n\n", type_name));
- if let Some(data) = &typ.compress_data {
- md.push_str(&format!("{}\n\n", data));
- }
- md.push_str(&format!("- Position\n\n{}:{}\n\n", typ.file, typ.line));
- md.push_str(&format!("- Codes\n\n```{}\n{}\n```\n\n", lang, typ.content));
- }
-
- // 添加变量
- for (var_name, var) in pkg.vars.iter() {
- if opts.public_only && !repo.is_exported(&var.id()) {
- continue;
- }
-
- md.push_str(&format!("### {}\n\n", var_name));
- if let Some(data) = &var.compress_data {
- md.push_str(&format!("{}\n\n", data));
- }
- md.push_str(&format!("- Position\n\n{}:{}\n\n", var.file, var.line));
- md.push_str(&format!("- Codes\n\n```{}\n{}\n```\n\n", lang, var.content));
- }
- }
- }
-
- md
-}
-
-// ... existing code ...
-
-pub fn export_repo(repo: &Repository, opts: &ExportOptions) {
- let dir = if let Some(path) = &opts.output {
- Path::new(path)
- } else {
- // pwd
- Path::new(&CONFIG.work_dir)
- };
-
- if opts.csv {
- // write summary to csv
- let csv_sum = to_csv_summary(repo);
- let path_sum = dir.join(repo.id.replace("/", "_").to_string() + "_summary.csv");
- let mut file = File::create(&path_sum).unwrap();
- file.write_all(csv_sum.as_bytes()).unwrap();
-
- // write summary to csv
- let csv_decl = to_csv_decl(repo);
- let path_decl = dir.join(repo.id.replace("/", "_").to_string() + "_decl.csv");
- let mut file = File::create(&path_decl).unwrap();
- file.write_all(csv_decl.as_bytes()).unwrap();
-
- // write package to csv
- let csv_pkg = to_csv_pkgs(repo);
- let path_pkg = dir.join(repo.id.replace("/", "_").to_string() + "_pkg.csv");
- let mut file = File::create(&path_pkg).unwrap();
- file.write_all(csv_pkg.as_bytes()).unwrap();
- } else {
- // write markdown
- let md = to_markdown(repo, opts);
- let path_md = dir.join(repo.id.replace("/", "_").to_string() + ".md");
- let mut file = File::create(&path_md).unwrap();
- file.write_all(md.as_bytes()).unwrap();
- }
-}
diff --git a/src/lang/go.go b/src/lang/go.go
deleted file mode 100644
index 08af9198..00000000
--- a/src/lang/go.go
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright 2025 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package main
-
-import (
- "context"
-
- "github.com/cloudwego/abcoder/src/lang/collect"
- "github.com/cloudwego/abcoder/src/lang/golang/parser"
- "github.com/cloudwego/abcoder/src/lang/uniast"
-)
-
-func callGoParser(ctx context.Context, repoPath string, opts collect.CollectOption) (*uniast.Repository, error) {
- goopts := parser.Options{}
- if opts.LoadExternalSymbol {
- goopts.ReferCodeDepth = 1
- }
- if !opts.NoNeedComment {
- goopts.CollectComment = true
- }
- if opts.NeedTest {
- goopts.NeedTest = true
- }
- goopts.Excludes = opts.Excludes
- p := parser.NewParser(repoPath, repoPath, goopts)
- repo, err := p.ParseRepo()
- if err != nil {
- return nil, err
- }
- return &repo, nil
-}
diff --git a/src/lang/main.go b/src/lang/main.go
deleted file mode 100644
index 93b9286e..00000000
--- a/src/lang/main.go
+++ /dev/null
@@ -1,238 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-/**
- * Copyright 2024 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package main
-
-import (
- "context"
- "encoding/json"
- "os"
- "os/exec"
- "path/filepath"
- "time"
-
- "github.com/cloudwego/abcoder/src/lang/collect"
- "github.com/cloudwego/abcoder/src/lang/log"
- "github.com/cloudwego/abcoder/src/lang/lsp"
- "github.com/cloudwego/abcoder/src/lang/rust"
- "github.com/cloudwego/abcoder/src/lang/uniast"
- "github.com/spf13/cobra"
-)
-
-func main() {
- var client *lsp.LSPClient
- var repoPath string
- var lang lsp.Language
-
- // Define flags
- var flagLsp string
- var flagVerbose, flagDebug bool
- var excludes *[]string
- var opts collect.CollectOption
-
- var rootCmd = &cobra.Command{
- Use: `lang
-Action:
- collect Parse repo and export AST
-Language:
- rust For rust codes
- go For go codes`,
- Short: "Lang: An universal language analyzer based on Language-Server-Protocol",
- Args: cobra.ExactArgs(3),
-
- PreRun: func(cmd *cobra.Command, args []string) {
- // validate arguments
- checkVerbose(flagVerbose, flagDebug)
- var err error
- repoPath, err = filepath.Abs(args[2])
- if err != nil {
- log.Error("Failed to get absolute path of repository: %v\n", err)
- os.Exit(1)
- }
- l, lspPath := checkLSP(args[1], flagLsp)
- lang = l
- openfile, opentime := checkRepoPath(repoPath, l)
- if lang == lsp.Golang {
- return
- }
- // Initialize the LSP client
- log.Info("start initialize LSP server %s...\n", lspPath)
- client, err = lsp.NewLSPClient(repoPath, openfile, opentime, lsp.ClientOptions{
- Server: lspPath,
- Language: l,
- Verbose: flagVerbose || flagDebug,
- })
- if err != nil {
- log.Error("failed to initialize LSP server: %v\n", err)
- os.Exit(2)
- }
- log.Info("end initialize LSP server")
- },
-
- Run: func(cmd *cobra.Command, args []string) {
- action := args[0]
- log.Info("start %s repository %s...\n", action, repoPath)
- // Perform the action
- ctx := context.Background()
- switch action {
- case "collect":
- if excludes != nil {
- opts.Excludes = *excludes
- }
- opts.Language = lang
- repo, err := collectSymbol(ctx, client, repoPath, opts)
- if err != nil {
- log.Error("Failed to collect symbols: %v\n", err)
- os.Exit(3)
- }
- log.Info("all symbols collected, start writing to stdout...\n")
- out, err := json.Marshal(repo)
- if err != nil {
- log.Error("Failed to marshal repository: %v\n", err)
- return
- }
- for n := 0; n < len(out); {
- i, err := os.Stdout.Write(out)
- if err != nil {
- log.Error("Failed to write to stdout: %v\n", err)
- return
- }
- n += i
- }
- return
- default:
- log.Error("Unsupported action: %s\n", action)
- os.Exit(1)
- }
- },
- }
-
- rootCmd.Flags().StringVar(&flagLsp, "lsp", "", "Specify the language server path.")
- rootCmd.Flags().BoolVarP(&flagVerbose, "verbose", "v", false, "Verbose mode.")
- rootCmd.Flags().BoolVarP(&flagDebug, "debug", "d", false, "Debug mode.")
- rootCmd.Flags().BoolVarP(&opts.LoadExternalSymbol, "load-external-symbol", "", false, "load external symbols into results")
- excludes = rootCmd.Flags().StringSlice("exclude", []string{}, "exclude files or directories")
- rootCmd.Flags().BoolVarP(&opts.NoNeedComment, "no-need-comment", "", false, "do not need comment (only works for Go now)")
- rootCmd.Flags().BoolVarP(&opts.NeedTest, "need-test", "", false, "need parse test files (only works for Go now)")
-
- // Execute the command
- if err := rootCmd.Execute(); err != nil {
- log.Error("Failed to execute command: %v\n", err)
- os.Exit(1)
- }
-}
-
-func checkRepoPath(repoPath string, language lsp.Language) (openfile string, wait time.Duration) {
- if _, err := os.Stat(repoPath); os.IsNotExist(err) {
- log.Error("Repository not found: %s\n", repoPath)
- os.Exit(1)
- }
- switch language {
- case lsp.Rust:
- // NOTICE: open the Cargo.toml file is required for Rust projects
- openfile, wait = rust.CheckRepo(repoPath)
- default:
- openfile = ""
- wait = 0
- }
-
- log.Info("open file '%s' and wait for %d seconds for initialize workspace\n", openfile, wait/time.Second)
- return
-}
-
-func checkVerbose(verbose bool, debug bool) {
- if debug {
- log.SetLogLevel(log.DebugLevel)
- } else if verbose {
- log.SetLogLevel(log.InfoLevel)
- } else {
- log.SetLogLevel(log.ErrorLevel)
- }
-}
-
-func checkLSP(language string, lspPath string) (l lsp.Language, s string) {
- switch language {
- case "rust":
- l, s = rust.GetDefaultLSP()
- case "golang", "go":
- l = lsp.Golang
- s = ""
- if _, err := exec.LookPath("go"); err != nil {
- if _, err := os.Stat(lspPath); os.IsNotExist(err) {
- log.Error("Go compiler not found, please make it excutable!\n", lspPath)
- os.Exit(1)
- }
- }
- return
- default:
- log.Error("Unsupported language: %s\n", language)
- os.Exit(1)
- }
- // check if lsp excutable
- if lspPath != "" {
- if _, err := exec.LookPath(lspPath); err != nil {
- if _, err := os.Stat(lspPath); os.IsNotExist(err) {
- log.Error("Language server %s not found, please make it excutable!\n", lspPath)
- os.Exit(1)
- }
- }
- s = lspPath
- }
-
- return
-}
-
-func collectSymbol(ctx context.Context, cli *lsp.LSPClient, repoPath string, opts collect.CollectOption) (repo *uniast.Repository, err error) {
- if opts.Language == lsp.Golang {
- repo, err = callGoParser(ctx, repoPath, opts)
- if err != nil {
- return nil, err
- }
- } else {
- collector := collect.NewCollector(repoPath, cli)
- collector.CollectOption = opts
- log.Info("start collecting symbols...\n")
- err = collector.Collect(ctx)
- if err != nil {
- return nil, err
- }
- log.Info("all symbols collected.\n")
- log.Info("start exporting symbols...\n")
- repo, err = collector.Export(ctx)
- if err != nil {
- return nil, err
- }
- }
-
- if err := repo.BuildGraph(); err != nil {
- return nil, err
- }
- return repo, nil
-}
diff --git a/src/lib.rs b/src/lib.rs
deleted file mode 100644
index 7cde95a5..00000000
--- a/src/lib.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-pub mod compress;
-pub mod config;
-pub mod export;
-pub mod parse;
-pub mod storage;
-pub mod utils;
diff --git a/src/parse.rs b/src/parse.rs
deleted file mode 100644
index 079ad503..00000000
--- a/src/parse.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Copyright 2025 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-use std::path::{Path, PathBuf};
-
-use crate::{
- compress::{compress, types::types::Repository},
- config::{self, CONFIG},
- storage::cache,
- utils::{cmd, errors::Error, git, split},
-};
-
-#[derive(Clone, Debug, Default)]
-pub struct CompressOptions {
- pub parse_only: bool,
- pub not_load_external_symbol: bool,
- pub no_need_comment: bool,
- pub force_update_ast: bool,
-}
-
-pub fn force_parse_repo(repo_path: &String, opts: &CompressOptions) -> Result {
- let path = parse_repo_path(repo_path)?;
- // force to parse the repo
- let data = parse_repo(&path, opts)?;
- match compress::from_json(&repo_path, String::from_utf8(data).unwrap().as_str()) {
- Ok(repo) => Ok(repo),
- Err(err) => Err(Error::Parse(err.to_string())),
- }
-}
-
-fn parse_repo_path(repo_path: &String) -> Result {
- let git_dir = Path::new(CONFIG.work_dir.as_str());
-
- let ps: Vec<&str> = repo_path.split('/').collect();
- let path = if repo_path.ends_with(".git") || repo_path.starts_with("https://") {
- // url
- let repo_name = ps[ps.len() - 1].strip_suffix(".git").unwrap();
- let path = git_dir.join(repo_name);
- if !path.exists() {
- // git clone
- git::git_clone(&repo_path, &path).expect("Failed to clone repo");
- }
- path
- } else {
- // existing path
- let path = git_dir.join(&repo_path);
- if !path.exists() {
- println!("path not exists: {:?}", path);
- return Err(Error::GitCloneError("path not exists".to_string()));
- }
- // directly use the repo name
- path
- };
- Ok(path)
-}
-
-pub fn get_repo(repo_path: &String, opts: &CompressOptions) -> Result {
- let path = parse_repo_path(repo_path)?;
-
- // check if cache the result
- let data = if let Some(data) = cache::get_cache().get(&path.to_str().unwrap()) {
- data
- } else {
- // parse the repo
- parse_repo(&path, opts)?
- };
-
- match compress::from_json(
- &path.to_str().unwrap(),
- String::from_utf8(data).unwrap().as_str(),
- ) {
- Ok(repo) => Ok(repo),
- Err(err) => Err(Error::Parse(err.to_string())),
- }
-}
-
-fn parse_repo(path: &Path, opts: &CompressOptions) -> Result, Error> {
- let (parser, args) = config::parser_and_args(path.to_str().unwrap(), opts);
- // parse the repo by parse
- match cmd::run_command_bytes(&parser, args) {
- Ok(output) => {
- cache::get_cache()
- .put(&path.to_str().unwrap(), output.clone())
- .unwrap();
- return Ok(output);
- }
- Err(err) => {
- println!(
- "plugin parse repo {} error: {}",
- path.to_str().unwrap(),
- err.to_string()
- );
- return Err(Error::Parse(err.to_string()));
- }
- }
-}
diff --git a/src/storage/cache.rs b/src/storage/cache.rs
deleted file mode 100644
index 3f71c777..00000000
--- a/src/storage/cache.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::error::Error;
-use std::path::Path;
-
-use crate::compress::compress;
-use crate::compress::types::types::Repository;
-use crate::config::CONFIG;
-use crate::storage::fs::FileStorage;
-use crate::storage::memory::MemoryCache;
-
-pub trait StorageEngine: Send {
- fn get(&mut self, key: &str) -> Option>;
- fn put(&mut self, key: &str, value: Vec) -> Result<(), Box>;
-}
-
-pub struct CachingStorageEngine {
- cache: C,
- backend: B,
-}
-
-impl CachingStorageEngine {
- pub fn new(cache: C, backend: B) -> Self {
- CachingStorageEngine { cache, backend }
- }
-}
-
-impl StorageEngine for CachingStorageEngine {
- fn get(&mut self, key: &str) -> Option> {
- if let Some(value) = self.cache.get(key) {
- Some(value)
- } else {
- if let Some(value) = self.backend.get(key) {
- let _ = self.cache.put(key, value.clone());
- Some(value)
- } else {
- None
- }
- }
- }
-
- fn put(&mut self, key: &str, value: Vec) -> Result<(), Box> {
- self.cache.put(key, value.clone())?;
- self.backend.put(key, value)
- }
-}
-
-pub fn get_cache() -> Box {
- let mem = MemoryCache::new(12);
- let fs = FileStorage::new(Path::new(CONFIG.work_dir.as_str())).unwrap();
-
- let mut cache = CachingStorageEngine::new(mem, fs);
- return Box::new(cache);
-}
-
-pub fn load_repo(cache: &mut Box, repo_name: &str) -> Option> {
- if let Some(repo) = cache.get(repo_name) {
- if let Ok(repo) = String::from_utf8(repo) {
- if let Ok(repo) = compress::from_json(repo_name, &repo) {
- let repo = Box::new(repo);
- return Some(repo);
- }
- }
- }
-
- None
-}
diff --git a/src/storage/fs.rs b/src/storage/fs.rs
deleted file mode 100644
index 394132fb..00000000
--- a/src/storage/fs.rs
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::error::Error;
-use std::fs::File;
-use std::io::{Read, Write};
-use std::ops::Add;
-use std::path::{Path, PathBuf};
-
-use crate::storage::cache::StorageEngine;
-
-pub struct FileStorage {
- base_dir: PathBuf,
-}
-
-impl FileStorage {
- pub fn new(base_dir: &Path) -> Result> {
- if !base_dir.exists() {
- std::fs::create_dir_all(base_dir)?;
- }
- Ok(FileStorage {
- base_dir: base_dir.to_path_buf(),
- })
- }
-}
-
-impl StorageEngine for FileStorage {
- fn get(&mut self, key: &str) -> Option> {
- let mut path = self.base_dir.clone();
- let safe_key = key.replace("/", "_");
- let json_key = safe_key.add(".json");
- path.push(json_key);
- println!("read file cached: {:?}", path);
- let mut file = match File::open(&path) {
- Ok(file) => file,
- Err(_) => {
- println!("not cached: {:?}", path);
- return None;
- }
- };
- let mut contents = Vec::new();
- match file.read_to_end(&mut contents) {
- Ok(_) => Some(contents),
- Err(_) => None,
- }
- }
-
- fn put(&mut self, key: &str, value: Vec) -> Result<(), Box> {
- let mut path = self.base_dir.clone();
- let safe_key = key.replace("/", "_");
- let json_key = safe_key.add(".json");
- path.push(json_key);
- let mut file = File::create(path)?;
- file.write_all(&value)?;
- Ok(())
- }
-}
diff --git a/src/storage/memory.rs b/src/storage/memory.rs
deleted file mode 100644
index 36c8ae19..00000000
--- a/src/storage/memory.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-use std::error::Error;
-
-use crate::storage::cache::StorageEngine;
-
-pub struct MemoryCache {
- map: HashMap>,
-}
-
-impl MemoryCache {
- pub fn new(cache_size: usize) -> Self {
- MemoryCache { map: HashMap::with_capacity(cache_size) }
- }
-}
-
-impl StorageEngine for MemoryCache {
- fn get(&mut self, key: &str) -> Option> {
- self.map.get(key).cloned()
- }
-
- fn put(&mut self, key: &str, value: Vec) -> Result<(), Box> {
- self.map.insert(key.to_string(), value);
- Ok(())
- }
-}
\ No newline at end of file
diff --git a/src/storage/mod.rs b/src/storage/mod.rs
deleted file mode 100644
index 7bb4763c..00000000
--- a/src/storage/mod.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-pub mod cache;
-pub mod fs;
-pub mod memory;
diff --git a/src/utils/cmd.rs b/src/utils/cmd.rs
deleted file mode 100644
index 168e0c5f..00000000
--- a/src/utils/cmd.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::io::{self, Error};
-use std::process::Command;
-use std::{env, os};
-
-pub fn run_command(cmd: &str, args: Vec<&str>) -> Result {
- println!("execute command: {} {:?}", cmd, args);
- // get current directory
- let output = Command::new(cmd).args(args).output()?;
-
- match output.status.success() {
- true => Ok(String::from_utf8_lossy(&output.stdout).into_owned()),
- false => Err(io::Error::new(
- io::ErrorKind::Other,
- String::from_utf8_lossy(vec![output.stdout, output.stderr].concat().as_slice())
- .into_owned(),
- )),
- }
-}
-
-pub fn run_command_bytes(cmd: &str, args: Vec) -> Result, Error> {
- println!("execute command: {} {:?}", cmd, args);
- let output = Command::new(cmd).args(args).output()?;
-
- match output.status.success() {
- true => Ok(output.stdout),
- false => Err(io::Error::new(
- io::ErrorKind::Other,
- String::from_utf8_lossy(vec![output.stdout, output.stderr].concat().as_slice())
- .into_owned(),
- )),
- }
-}
diff --git a/src/utils/errors.rs b/src/utils/errors.rs
deleted file mode 100644
index 68d2e1e1..00000000
--- a/src/utils/errors.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2025 CloudWeGo Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// https://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Import necessary types from the standard library
-use std::process::Output;
-use std::{fmt, io, num::ParseIntError};
-
-// Our custom error type that can encompass various types of errors
-// that may occur in our application
-pub enum Error {
- Io(io::Error),
- GitCloneError(String),
- Parse(String),
- // more error types can be added here as needed by the application...
-}
-
-// Implement the Display trait for our Error type, so we can print the error messages
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match self {
- Error::Io(ref err) => write!(f, "IO error: {}", err),
- Error::GitCloneError(message) => write!(f, "Git clone error: {}", message),
- Error::Parse(ref err) => write!(f, "Parse error: {}", err),
- // ...
- }
- }
-}
-
-// We also implement the Debug trait, here we elect to delegate to the Display implementation
-impl fmt::Debug for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- fmt::Display::fmt(self, f)
- }
-}
-
-// We implement conversion from io:Error into our custom type
-impl From for Error {
- fn from(err: io::Error) -> Error {
- Error::Io(err)
- }
-}
-
-// We implement conversion from Output into our custom type
-impl From