diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 5f9c670bf8..c5216cbe3a 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -75,7 +75,7 @@ impl HttpClient { Ok(temp_file) } - /// Upload a temporary file and return the temp_path + /// Upload a temporary file and return the temp_file_id async fn upload_temp_file(&self, file_path: &Path) -> Result { let url = format!("{}/api/v1/resources/temp_upload", self.base_url); let file_name = file_path @@ -110,10 +110,10 @@ impl HttpClient { let result: Value = self.handle_response(response).await?; result - .get("temp_path") + .get("temp_file_id") .and_then(|v| v.as_str()) .map(|s| s.to_string()) - .ok_or_else(|| Error::Parse("Missing temp_path in response".to_string())) + .ok_or_else(|| Error::Parse("Missing temp_file_id in response".to_string())) } fn build_headers(&self) -> reqwest::header::HeaderMap { @@ -536,10 +536,10 @@ impl HttpClient { if path_obj.exists() { if path_obj.is_dir() { let zip_file = self.zip_directory(path_obj)?; - let temp_path = self.upload_temp_file(zip_file.path()).await?; + let temp_file_id = self.upload_temp_file(zip_file.path()).await?; let body = serde_json::json!({ - "temp_path": temp_path, + "temp_file_id": temp_file_id, "to": to, "parent": parent, "reason": reason, @@ -556,10 +556,10 @@ impl HttpClient { self.post("/api/v1/resources", &body).await } else if path_obj.is_file() { - let temp_path = self.upload_temp_file(path_obj).await?; + let temp_file_id = self.upload_temp_file(path_obj).await?; let body = serde_json::json!({ - "temp_path": temp_path, + "temp_file_id": temp_file_id, "to": to, "parent": parent, "reason": reason, @@ -626,19 +626,19 @@ impl HttpClient { if path_obj.exists() { if path_obj.is_dir() { let zip_file = self.zip_directory(path_obj)?; - let temp_path = self.upload_temp_file(zip_file.path()).await?; + let temp_file_id = self.upload_temp_file(zip_file.path()).await?; let body = serde_json::json!({ - "temp_path": temp_path, + "temp_file_id": temp_file_id, "wait": wait, "timeout": timeout, }); self.post("/api/v1/skills", &body).await } else if path_obj.is_file() { - let temp_path = self.upload_temp_file(path_obj).await?; + let temp_file_id = self.upload_temp_file(path_obj).await?; let body = serde_json::json!({ - "temp_path": temp_path, + "temp_file_id": temp_file_id, "wait": wait, "timeout": timeout, }); @@ -707,8 +707,24 @@ impl HttpClient { force: bool, vectorize: bool, ) -> Result { + let file_path_obj = Path::new(file_path); + + if !file_path_obj.exists() { + return Err(Error::Client(format!( + "Local ovpack file not found: {}", + file_path + ))); + } + if !file_path_obj.is_file() { + return Err(Error::Client(format!( + "Path is not a file: {}", + file_path + ))); + } + + let temp_file_id = self.upload_temp_file(file_path_obj).await?; let body = serde_json::json!({ - "file_path": file_path, + "temp_file_id": temp_file_id, "parent": parent, "force": force, "vectorize": vectorize, diff --git a/docs/en/api/01-overview.md b/docs/en/api/01-overview.md index f9bf74f877..80da5d4254 100644 --- a/docs/en/api/01-overview.md +++ b/docs/en/api/01-overview.md @@ -91,6 +91,13 @@ export OPENVIKING_CLI_CONFIG_FILE=/path/to/ovcli.conf See the [Configuration Guide](../guides/01-configuration.md#ovcliconf) for details. +**Local files in HTTP mode** + +- CLI, `SyncHTTPClient`, and `AsyncHTTPClient` automatically upload local files and directories before calling the server API. +- Raw HTTP callers do not get this convenience layer. When using `curl` or another HTTP client, upload the file with `POST /api/v1/resources/temp_upload` first, then call the target API with the returned `temp_file_id`. +- For local directories in raw HTTP mode, zip the directory first and upload the `.zip` file; the server does not accept direct host directory paths. +- `POST /api/v1/resources` accepts remote URLs directly, but does not accept direct host filesystem paths such as `./doc.md` or `/tmp/doc.md`. + ### Direct HTTP (curl) ```bash diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 6d41c17924..409a96d13d 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -40,7 +40,8 @@ Add a resource to the knowledge base. | Parameter | Type | Required | Default | Description | |-----------|------|----------|---------|-------------| -| path | str | Yes | - | Local file path, directory path, or URL | +| path | str | Yes | - | SDK/CLI: local path, directory path, or URL. Raw HTTP: remote URL only | +| temp_file_id | str | No | None | Upload ID returned by `POST /api/v1/resources/temp_upload` for raw HTTP local file ingestion | | target | str | No | None | Target Viking URI (must be in `resources` scope) | | reason | str | No | "" | Why this resource is being added (improves search relevance) | | instruction | str | No | "" | Special processing instructions | @@ -48,6 +49,15 @@ Add a resource to the knowledge base. | timeout | float | No | None | Timeout in seconds (only used when wait=True) | | watch_interval | float | No | 0 | Watch interval (minutes). >0 enables/updates watch; <=0 disables watch. Only takes effect when target is provided | +**How local files and directories work** + +- Python SDK and CLI accept local file and directory paths directly. In HTTP mode they automatically upload local files before calling the server API. +- Raw HTTP callers should think in two categories: + - Remote source: pass `path` directly, for example `https://example.com/doc.pdf` + - Local file: call `POST /api/v1/resources/temp_upload` first, then pass the returned `temp_file_id` + - Local directory: zip it first, upload the `.zip` file, then pass the returned `temp_file_id` +- `POST /api/v1/resources` does not accept direct host filesystem paths such as `./guide.md`, `/tmp/guide.md`, or `/tmp/my-dir/`. + **Incremental Updates** When you call `add_resource()` repeatedly for the same resource URI, the system performs an incremental update instead of rebuilding everything from scratch: @@ -83,7 +93,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ -d '{ - "path": "./documents/guide.md", + "path": "https://example.com/guide.md", "reason": "User guide documentation" }' ``` @@ -142,6 +152,58 @@ curl -X POST http://localhost:1933/api/v1/resources \ openviking add-resource https://example.com/api-docs.md --to viking://resources/external/ --reason "External API documentation" ``` +**Example: Add a Local File with Raw HTTP** + +When you call the HTTP API directly, upload local files first and then use `temp_file_id`. + +```bash +# Step 1: upload the local file +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@./documents/guide.md' \ + | jq -r '.result.temp_file_id' +) + +# Step 2: add the uploaded file +curl -X POST http://localhost:1933/api/v1/resources \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\", + \"reason\": \"User guide documentation\", + \"wait\": true + }" +``` + +**Example: Add a Local Directory with Raw HTTP** + +When you call the HTTP API directly, zip the directory yourself first. CLI and SDK do this automatically for you. + +```bash +# Step 1: zip the local directory +cd ./documents +zip -r /tmp/guide.zip ./guide + +# Step 2: upload the zip file +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@/tmp/guide.zip' \ + | jq -r '.result.temp_file_id' +) + +# Step 3: add the uploaded directory archive +curl -X POST http://localhost:1933/api/v1/resources \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\", + \"reason\": \"Import local directory\", + \"wait\": true + }" +``` + **Example: Add Feishu/Lark Cloud Documents** [Feishu](https://www.feishu.cn) (飞书) and its international version [Lark](https://www.larksuite.com) are widely used for documentation in Chinese tech companies. OpenViking can directly import cloud documents by URL. @@ -218,7 +280,7 @@ print(f"All processed: {status}") curl -X POST http://localhost:1933/api/v1/resources \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ - -d '{"path": "./documents/guide.md", "wait": true}' + -d '{"path": "https://example.com/guide.md", "wait": true}' # Wait separately after batch curl -X POST http://localhost:1933/api/v1/system/wait \ @@ -260,7 +322,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ -d '{ - "path": "./documents/guide.md", + "path": "https://example.com/guide.md", "target": "viking://resources/documents/guide.md", "watch_interval": 60 }' @@ -338,7 +400,7 @@ openviking export viking://resources/my-project/ ./exports/my-project.ovpack Import a `.ovpack` file. -**Parameters** +**SDK / CLI parameters** | Parameter | Type | Required | Default | Description | |-----------|------|----------|---------|-------------| @@ -347,6 +409,15 @@ Import a `.ovpack` file. | force | bool | No | False | Overwrite existing resources | | vectorize | bool | No | True | Trigger vectorization after import | +**Raw HTTP request body** + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| temp_file_id | str | Yes | - | Upload ID returned by `POST /api/v1/resources/temp_upload` | +| parent | str | Yes | - | Target parent URI | +| force | bool | No | False | Overwrite existing resources | +| vectorize | bool | No | True | Trigger vectorization after import | + **Python SDK (Embedded / HTTP)** ```python @@ -368,15 +439,24 @@ POST /api/v1/pack/import ``` ```bash +# Step 1: upload the local ovpack file +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@./exports/my-project.ovpack' \ + | jq -r '.result.temp_file_id' +) + +# Step 2: import using temp_file_id curl -X POST http://localhost:1933/api/v1/pack/import \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ - -d '{ - "file_path": "./exports/my-project.ovpack", - "parent": "viking://resources/imported/", - "force": true, - "vectorize": true - }' + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\", + \"parent\": \"viking://resources/imported/\", + \"force\": true, + \"vectorize\": true + }" ``` **CLI** diff --git a/docs/en/api/04-skills.md b/docs/en/api/04-skills.md index 9d03608543..24938ca7a5 100644 --- a/docs/en/api/04-skills.md +++ b/docs/en/api/04-skills.md @@ -12,10 +12,21 @@ Add a skill to the knowledge base. | Parameter | Type | Required | Default | Description | |-----------|------|----------|---------|-------------| -| data | Any | Yes | - | Skill data (dict, string, or path) | +| data | Any | Yes | - | Skill data. Raw HTTP accepts structured data or raw `SKILL.md` content, not direct host paths | +| temp_file_id | str | No | None | Upload ID returned by `POST /api/v1/resources/temp_upload` for raw HTTP local file ingestion | | wait | bool | No | False | Wait for vectorization to complete | | timeout | float | No | None | Timeout in seconds | +**How local skill files work** + +- Python SDK and CLI accept local `SKILL.md` files or directories directly. In HTTP mode they automatically upload local files before calling the server API. +- Raw HTTP callers should either: + - send structured skill data directly in `data` + - send raw `SKILL.md` content in `data` + - upload a local `SKILL.md` file first with `POST /api/v1/resources/temp_upload`, then call `POST /api/v1/skills` with `temp_file_id` + - zip a local skill directory first, upload the `.zip` file, then call `POST /api/v1/skills` with `temp_file_id` +- `POST /api/v1/skills` does not accept direct host filesystem paths in `data`. + **Supported Data Formats** 1. **Dict (Skill format)**: @@ -185,14 +196,25 @@ print(f"Auxiliary files: {result['auxiliary_files']}") **HTTP API** ```bash +# Step 1: upload the local SKILL.md file +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@./skills/search-web/SKILL.md' \ + | jq -r '.result.temp_file_id' +) + +# Step 2: add the uploaded skill curl -X POST http://localhost:1933/api/v1/skills \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ - -d '{ - "data": "./skills/search-web/SKILL.md" - }' + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\" + }" ``` +For a local skill directory, zip the directory first, upload the `.zip` file, then call the same `POST /api/v1/skills` request with the returned `temp_file_id`. + --- ## SKILL.md Format diff --git a/docs/en/getting-started/03-quickstart-server.md b/docs/en/getting-started/03-quickstart-server.md index 035090f3f5..6fb49012ae 100644 --- a/docs/en/getting-started/03-quickstart-server.md +++ b/docs/en/getting-started/03-quickstart-server.md @@ -115,6 +115,8 @@ export OPENVIKING_CLI_CONFIG_FILE=/path/to/ovcli.conf ## Connect with curl +Use direct `path` for remote URLs. For local files, upload first with `POST /api/v1/resources/temp_upload`, then call the target API with the returned `temp_file_id`. For local directories in raw HTTP mode, zip the directory first and upload the `.zip` file. + ```bash # Add a resource curl -X POST http://localhost:1933/api/v1/resources \ diff --git a/docs/zh/api/01-overview.md b/docs/zh/api/01-overview.md index 48675085ac..7912591f6f 100644 --- a/docs/zh/api/01-overview.md +++ b/docs/zh/api/01-overview.md @@ -91,6 +91,13 @@ export OPENVIKING_CLI_CONFIG_FILE=/path/to/ovcli.conf 详见 [配置指南](../guides/01-configuration.md#ovcliconf)。 +**HTTP 模式下的本地文件** + +- CLI、`SyncHTTPClient`、`AsyncHTTPClient` 遇到本地文件或目录时,会先自动上传,再调用服务端 API。 +- 裸 HTTP 调用没有这层封装。使用 `curl` 或其他 HTTP 客户端时,需要先调用 `POST /api/v1/resources/temp_upload`,再把返回的 `temp_file_id` 传给目标 API。 +- 裸 HTTP 如果导入本地目录,需要先自行打成 `.zip` 再上传;服务端不接受直接传宿主机目录路径。 +- `POST /api/v1/resources` 可以直接接收远端 URL,但不接受 `./doc.md`、`/tmp/doc.md` 这类宿主机本地路径。 + ### 直接 HTTP(curl) ```bash diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 548460d4da..04a9c184dc 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -40,7 +40,8 @@ Input -> Parser -> TreeBuilder -> AGFS -> SemanticQueue -> Vector Index | 参数 | 类型 | 必填 | 默认值 | 说明 | |------|------|------|--------|------| -| path | str | 是 | - | 本地文件路径、目录路径或 URL | +| path | str | 是 | - | SDK/CLI 可传本地路径、目录路径或 URL;裸 HTTP 仅支持远端 URL | +| temp_file_id | str | 否 | None | `POST /api/v1/resources/temp_upload` 返回的上传 ID,用于裸 HTTP 导入本地文件 | | target | str | 否 | None | 目标 Viking URI(必须在 `resources` 作用域内) | | reason | str | 否 | "" | 添加该资源的原因(可提升搜索相关性) | | instruction | str | 否 | "" | 特殊处理指令 | @@ -48,6 +49,15 @@ Input -> Parser -> TreeBuilder -> AGFS -> SemanticQueue -> Vector Index | timeout | float | 否 | None | 超时时间(秒),仅在 wait=True 时生效 | | watch_interval | float | 否 | 0 | 定时更新间隔(分钟)。>0 开启/更新定时任务;<=0 关闭(停用)定时任务。仅在指定 target 时生效 | +**本地文件和目录如何处理** + +- Python SDK 和 CLI 可以直接接收本地文件和目录路径。处于 HTTP 模式时,它们会先自动上传,再调用服务端 API。 +- 裸 HTTP 调用可以按两类理解: + - 远端资源:直接传 `path`,例如 `https://example.com/doc.pdf` + - 本地文件:先调用 `POST /api/v1/resources/temp_upload`,再把返回的 `temp_file_id` 传给目标 API + - 本地目录:先自行打成 `.zip`,上传该压缩包,再把返回的 `temp_file_id` 传给目标 API +- `POST /api/v1/resources` 不接受 `./guide.md`、`/tmp/guide.md`、`/tmp/my-dir/` 这类宿主机本地路径。 + **增量更新(Incremental Update)** 当你为同一个资源 URI 反复调用 `add_resource()` 时,系统会走“增量更新”而不是每次全量重建: @@ -83,7 +93,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ -d '{ - "path": "./documents/guide.md", + "path": "https://example.com/guide.md", "reason": "User guide documentation" }' ``` @@ -142,6 +152,58 @@ curl -X POST http://localhost:1933/api/v1/resources \ openviking add-resource https://example.com/api-docs.md --to viking://resources/external/ --reason "External API documentation" ``` +**示例:用裸 HTTP 添加本地文件** + +如果你直接调用 HTTP API,本地文件要先上传,再使用 `temp_file_id`。 + +```bash +# 第一步:上传本地文件 +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@./documents/guide.md' \ + | jq -r '.result.temp_file_id' +) + +# 第二步:用 temp_file_id 添加资源 +curl -X POST http://localhost:1933/api/v1/resources \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\", + \"reason\": \"User guide documentation\", + \"wait\": true + }" +``` + +**示例:用裸 HTTP 添加本地目录** + +如果你直接调用 HTTP API,本地目录需要先自行打成 zip。CLI 和 SDK 会自动完成这一步。 + +```bash +# 第一步:先把本地目录打成 zip +cd ./documents +zip -r /tmp/guide.zip ./guide + +# 第二步:上传 zip 文件 +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@/tmp/guide.zip' \ + | jq -r '.result.temp_file_id' +) + +# 第三步:用上传后的目录压缩包添加资源 +curl -X POST http://localhost:1933/api/v1/resources \ + -H "Content-Type: application/json" \ + -H "X-API-Key: your-key" \ + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\", + \"reason\": \"Import local directory\", + \"wait\": true + }" +``` + **示例:添加飞书/Lark 云端文档** [飞书](https://www.feishu.cn)及其国际版 [Lark](https://www.larksuite.com) 是国内科技公司广泛使用的协作平台。OpenViking 可以通过 URL 直接导入飞书云端文档。 @@ -218,7 +280,7 @@ print(f"All processed: {status}") curl -X POST http://localhost:1933/api/v1/resources \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ - -d '{"path": "./documents/guide.md", "wait": true}' + -d '{"path": "https://example.com/guide.md", "wait": true}' # 批量添加后单独等待 curl -X POST http://localhost:1933/api/v1/system/wait \ @@ -260,7 +322,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ -d '{ - "path": "./documents/guide.md", + "path": "https://example.com/guide.md", "target": "viking://resources/documents/guide.md", "watch_interval": 60 }' @@ -338,7 +400,7 @@ openviking export viking://resources/my-project/ ./exports/my-project.ovpack 导入 `.ovpack` 文件。 -**参数** +**SDK / CLI 参数** | 参数 | 类型 | 必填 | 默认值 | 说明 | |------|------|------|--------|------| @@ -347,6 +409,15 @@ openviking export viking://resources/my-project/ ./exports/my-project.ovpack | force | bool | 否 | False | 覆盖已有资源 | | vectorize | bool | 否 | True | 导入后触发向量化 | +**裸 HTTP 请求体** + +| 参数 | 类型 | 必填 | 默认值 | 说明 | +|------|------|------|--------|------| +| temp_file_id | str | 是 | - | `POST /api/v1/resources/temp_upload` 返回的上传 ID | +| parent | str | 是 | - | 目标父级 URI | +| force | bool | 否 | False | 覆盖已有资源 | +| vectorize | bool | 否 | True | 导入后触发向量化 | + **Python SDK (Embedded / HTTP)** ```python @@ -368,15 +439,24 @@ POST /api/v1/pack/import ``` ```bash +# 第一步:上传本地 ovpack 文件 +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@./exports/my-project.ovpack' \ + | jq -r '.result.temp_file_id' +) + +# 第二步:使用 temp_file_id 导入 curl -X POST http://localhost:1933/api/v1/pack/import \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ - -d '{ - "file_path": "./exports/my-project.ovpack", - "parent": "viking://resources/imported/", - "force": true, - "vectorize": true - }' + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\", + \"parent\": \"viking://resources/imported/\", + \"force\": true, + \"vectorize\": true + }" ``` **CLI** diff --git a/docs/zh/api/04-skills.md b/docs/zh/api/04-skills.md index 9a0f32593a..bcf5ced2b7 100644 --- a/docs/zh/api/04-skills.md +++ b/docs/zh/api/04-skills.md @@ -12,10 +12,21 @@ | 参数 | 类型 | 必填 | 默认值 | 说明 | |------|------|------|--------|------| -| data | Any | 是 | - | 技能数据(字典、字符串或路径) | +| data | Any | 是 | - | 技能数据。裸 HTTP 支持结构化数据或原始 `SKILL.md` 内容,不支持直接传宿主机路径 | +| temp_file_id | str | 否 | None | `POST /api/v1/resources/temp_upload` 返回的上传 ID,用于裸 HTTP 导入本地文件 | | wait | bool | 否 | False | 等待向量化完成 | | timeout | float | 否 | None | 超时时间(秒) | +**本地技能文件如何处理** + +- Python SDK 和 CLI 可以直接接收本地 `SKILL.md` 文件或目录。处于 HTTP 模式时,它们会先自动上传,再调用服务端 API。 +- 裸 HTTP 调用有三种推荐方式: + - 在 `data` 中直接传结构化 skill 数据 + - 在 `data` 中直接传原始 `SKILL.md` 内容 + - 先调用 `POST /api/v1/resources/temp_upload` 上传本地 `SKILL.md` 文件,再调用 `POST /api/v1/skills` 并传入 `temp_file_id` + - 先把本地 skill 目录打成 `.zip`,上传该压缩包,再调用 `POST /api/v1/skills` 并传入 `temp_file_id` +- `POST /api/v1/skills` 不接受在 `data` 中直接传宿主机本地路径。 + **支持的数据格式** 1. **字典(技能格式)**: @@ -185,14 +196,25 @@ print(f"Auxiliary files: {result['auxiliary_files']}") **HTTP API** ```bash +# 第一步:上传本地 SKILL.md 文件 +TEMP_FILE_ID=$( + curl -sS -X POST http://localhost:1933/api/v1/resources/temp_upload \ + -H "X-API-Key: your-key" \ + -F 'file=@./skills/search-web/SKILL.md' \ + | jq -r '.result.temp_file_id' +) + +# 第二步:添加上传后的技能文件 curl -X POST http://localhost:1933/api/v1/skills \ -H "Content-Type: application/json" \ -H "X-API-Key: your-key" \ - -d '{ - "data": "./skills/search-web/SKILL.md" - }' + -d "{ + \"temp_file_id\": \"$TEMP_FILE_ID\" + }" ``` +如果是本地 skill 目录,先把目录打成 `.zip`,上传该压缩包,再用返回的 `temp_file_id` 调用同一个 `POST /api/v1/skills` 请求即可。 + --- ## SKILL.md 格式 diff --git a/docs/zh/getting-started/03-quickstart-server.md b/docs/zh/getting-started/03-quickstart-server.md index f06687df53..9fcdf9f13a 100644 --- a/docs/zh/getting-started/03-quickstart-server.md +++ b/docs/zh/getting-started/03-quickstart-server.md @@ -114,6 +114,8 @@ export OPENVIKING_CLI_CONFIG_FILE=/path/to/ovcli.conf ## 使用 curl 连接 +远端 URL 可以直接放在 `path` 里。本地文件需要先调用 `POST /api/v1/resources/temp_upload` 上传,再用返回的 `temp_file_id` 调目标 API。裸 HTTP 如果导入本地目录,需要先把目录打成 `.zip` 再上传。 + ```bash # Add a resource curl -X POST http://localhost:1933/api/v1/resources \ diff --git a/openviking/console/static/app.js b/openviking/console/static/app.js index f59656cf00..1f12de44a0 100644 --- a/openviking/console/static/app.js +++ b/openviking/console/static/app.js @@ -1719,16 +1719,16 @@ function bindAddResource() { method: "POST", body: formData, }); - const tempPath = uploadPayload.result?.temp_path; - if (!tempPath) { - throw new Error("Temp upload did not return temp_path."); + const tempFileId = uploadPayload.result?.temp_file_id; + if (!tempFileId) { + throw new Error("Temp upload did not return temp_file_id."); } const addPayload = await callConsole("/ov/resources", { method: "POST", body: JSON.stringify({ ...buildAddResourcePayload(), - temp_path: tempPath, + temp_file_id: tempFileId, }), }); diff --git a/openviking/server/local_input_guard.py b/openviking/server/local_input_guard.py new file mode 100644 index 0000000000..9e490b46f2 --- /dev/null +++ b/openviking/server/local_input_guard.py @@ -0,0 +1,91 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 +"""Guards for local-path handling on the HTTP server.""" + +from __future__ import annotations + +import re +from pathlib import Path + +from openviking_cli.exceptions import PermissionDeniedError + +_WINDOWS_DRIVE_RE = re.compile(r"^[A-Za-z]:[\\/]") +_REMOTE_SOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://") + + +def is_remote_resource_source(source: str) -> bool: + """Return True if *source* is a remotely fetchable resource location.""" + return source.startswith(_REMOTE_SOURCE_PREFIXES) + + +def looks_like_local_path(value: str) -> bool: + """Return True for strings that clearly look like filesystem paths.""" + if not value or "\n" in value or "\r" in value: + return False + return ( + value.startswith(("/", "./", "../", "~/", ".\\", "..\\", "~\\")) + or "/" in value + or "\\" in value + or bool(_WINDOWS_DRIVE_RE.match(value)) + ) + + +def require_remote_resource_source(source: str) -> str: + """Reject direct host-path resource ingestion over HTTP.""" + if not is_remote_resource_source(source): + raise PermissionDeniedError( + "HTTP server only accepts remote resource URLs or temp-uploaded files; " + "direct host filesystem paths are not allowed." + ) + return source + + +def deny_direct_local_skill_input(value: str) -> None: + """Reject obvious local filesystem paths for skill uploads over HTTP.""" + if looks_like_local_path(value): + raise PermissionDeniedError( + "HTTP server only accepts raw skill content or temp-uploaded files; " + "direct host filesystem paths are not allowed." + ) + + +def resolve_uploaded_temp_file_id(temp_file_id: str, upload_temp_dir: Path) -> str: + """Resolve a temp upload id to a regular file under the server upload temp dir.""" + if not temp_file_id or temp_file_id in {".", ".."}: + raise PermissionDeniedError( + "HTTP server only accepts regular files from the upload temp directory." + ) + + raw_name = Path(temp_file_id) + if raw_name.name != temp_file_id or "/" in temp_file_id or "\\" in temp_file_id: + raise PermissionDeniedError( + "HTTP server only accepts temp_file_id values issued from the upload temp directory." + ) + + raw_path = upload_temp_dir / temp_file_id + if raw_path.is_symlink(): + raise PermissionDeniedError( + "HTTP server only accepts regular files from the upload temp directory." + ) + + try: + resolved_path = raw_path.resolve(strict=True) + except (FileNotFoundError, OSError) as exc: + raise PermissionDeniedError( + "HTTP server only accepts regular files from the upload temp directory." + ) from exc + + upload_root = upload_temp_dir.resolve() + try: + resolved_path.relative_to(upload_root) + except ValueError as exc: + raise PermissionDeniedError( + "HTTP server only accepts temp_file_id values issued from the upload temp directory." + ) from exc + + if not resolved_path.is_file(): + raise PermissionDeniedError( + "HTTP server only accepts regular files from the upload temp directory." + ) + + return str(resolved_path) diff --git a/openviking/server/routers/pack.py b/openviking/server/routers/pack.py index 738bc7ea4d..86ad726b4a 100644 --- a/openviking/server/routers/pack.py +++ b/openviking/server/routers/pack.py @@ -2,15 +2,15 @@ # SPDX-License-Identifier: Apache-2.0 """Pack endpoints for OpenViking HTTP Server.""" -from typing import Optional - from fastapi import APIRouter, Depends -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from openviking.server.auth import get_request_context from openviking.server.dependencies import get_service from openviking.server.identity import RequestContext +from openviking.server.local_input_guard import resolve_uploaded_temp_file_id from openviking.server.models import Response +from openviking_cli.utils.config.open_viking_config import get_openviking_config router = APIRouter(prefix="/api/v1/pack", tags=["pack"]) @@ -23,10 +23,18 @@ class ExportRequest(BaseModel): class ImportRequest(BaseModel): - """Request model for import.""" + """Request model for import. + + Attributes: + temp_file_id: Temporary upload id returned by /api/v1/resources/temp_upload. + parent: Parent URI under which the imported pack will be placed. + force: Whether to overwrite existing content if needed. + vectorize: Whether to build vectors for imported content. + """ + + model_config = ConfigDict(extra="forbid") - file_path: Optional[str] = None - temp_path: Optional[str] = None + temp_file_id: str parent: str force: bool = False vectorize: bool = True @@ -51,9 +59,8 @@ async def import_ovpack( """Import .ovpack file.""" service = get_service() - file_path = request.file_path - if request.temp_path: - file_path = request.temp_path + upload_temp_dir = get_openviking_config().storage.get_upload_temp_dir() + file_path = resolve_uploaded_temp_file_id(request.temp_file_id, upload_temp_dir) result = await service.pack.import_ovpack( file_path, diff --git a/openviking/server/routers/resources.py b/openviking/server/routers/resources.py index 94e70a056f..415776cab2 100644 --- a/openviking/server/routers/resources.py +++ b/openviking/server/routers/resources.py @@ -8,11 +8,15 @@ from typing import Any, Optional from fastapi import APIRouter, Depends, File, Form, UploadFile -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, ConfigDict, model_validator from openviking.server.auth import get_request_context from openviking.server.dependencies import get_service from openviking.server.identity import RequestContext +from openviking.server.local_input_guard import ( + require_remote_resource_source, + resolve_uploaded_temp_file_id, +) from openviking.server.models import Response from openviking.server.telemetry import run_operation from openviking.telemetry import TelemetryRequest @@ -26,8 +30,10 @@ class AddResourceRequest(BaseModel): """Request model for add_resource. Attributes: - path: Resource path (local file path or URL). Either path or temp_path must be provided. - temp_path: Temporary file path for uploaded files. Either path or temp_path must be provided. + path: Remote resource source such as an HTTP(S) URL or repository URL. + Either path or temp_file_id must be provided. + temp_file_id: Temporary upload id returned by /api/v1/resources/temp_upload. + Either path or temp_file_id must be provided. to: Target URI for the resource (e.g., "viking://resources/my_resource"). If not specified, an auto-generated URI will be used. parent: Parent URI under which the resource will be stored. @@ -57,8 +63,10 @@ class AddResourceRequest(BaseModel): creating a new one. """ + model_config = ConfigDict(extra="forbid") + path: Optional[str] = None - temp_path: Optional[str] = None + temp_file_id: Optional[str] = None to: Optional[str] = None parent: Optional[str] = None reason: str = "" @@ -75,21 +83,37 @@ class AddResourceRequest(BaseModel): watch_interval: float = 0 @model_validator(mode="after") - def check_path_or_temp_path(self): - if not self.path and not self.temp_path: - raise ValueError("Either 'path' or 'temp_path' must be provided") + def check_path_or_temp_file_id(self): + if not self.path and not self.temp_file_id: + raise ValueError("Either 'path' or 'temp_file_id' must be provided") return self class AddSkillRequest(BaseModel): - """Request model for add_skill.""" + """Request model for add_skill. + + Attributes: + data: Inline skill content or structured skill data. HTTP requests do not treat + string values as host filesystem paths. + temp_file_id: Temporary upload id returned by /api/v1/resources/temp_upload. + wait: Whether to wait for skill processing to complete. + timeout: Timeout in seconds when wait=True. + """ + + model_config = ConfigDict(extra="forbid") data: Any = None - temp_path: Optional[str] = None + temp_file_id: Optional[str] = None wait: bool = False timeout: Optional[float] = None telemetry: TelemetryRequest = False + @model_validator(mode="after") + def check_data_or_temp_file_id(self): + if self.data is None and not self.temp_file_id: + raise ValueError("Either 'data' or 'temp_file_id' must be provided") + return self + def _cleanup_temp_files(temp_dir: Path, max_age_hours: int = 1): """Clean up temporary files older than max_age_hours.""" @@ -129,7 +153,7 @@ async def _upload() -> dict[str, str]: with open(temp_file_path, "wb") as f: f.write(await file.read()) - return {"temp_path": str(temp_file_path)} + return {"temp_file_id": temp_filename} execution = await run_operation( operation="resources.temp_upload", @@ -153,11 +177,16 @@ async def add_resource( if request.to and request.parent: raise InvalidArgumentError("Cannot specify both 'to' and 'parent' at the same time.") + upload_temp_dir = get_openviking_config().storage.get_upload_temp_dir() path = request.path - if request.temp_path: - path = request.temp_path + allow_local_path_resolution = False + if request.temp_file_id: + path = resolve_uploaded_temp_file_id(request.temp_file_id, upload_temp_dir) + allow_local_path_resolution = True + elif path is not None: + path = require_remote_resource_source(path) if path is None: - raise InvalidArgumentError("Either 'path' or 'temp_path' must be provided.") + raise InvalidArgumentError("Either 'path' or 'temp_file_id' must be provided.") kwargs = { "strict": request.strict, @@ -182,6 +211,7 @@ async def add_resource( instruction=request.instruction, wait=request.wait, timeout=request.timeout, + allow_local_path_resolution=allow_local_path_resolution, **kwargs, ), ) @@ -199,9 +229,12 @@ async def add_skill( ): """Add skill to OpenViking.""" service = get_service() + upload_temp_dir = get_openviking_config().storage.get_upload_temp_dir() data = request.data - if request.temp_path: - data = request.temp_path + allow_local_path_resolution = False + if request.temp_file_id: + data = resolve_uploaded_temp_file_id(request.temp_file_id, upload_temp_dir) + allow_local_path_resolution = True execution = await run_operation( operation="resources.add_skill", @@ -211,6 +244,7 @@ async def add_skill( ctx=_ctx, wait=request.wait, timeout=request.timeout, + allow_local_path_resolution=allow_local_path_resolution, ), ) return Response( diff --git a/openviking/service/resource_service.py b/openviking/service/resource_service.py index 1206f70b94..4f0001d3eb 100644 --- a/openviking/service/resource_service.py +++ b/openviking/service/resource_service.py @@ -109,6 +109,7 @@ async def add_resource( summarize: bool = False, watch_interval: float = 0, skip_watch_management: bool = False, + allow_local_path_resolution: bool = True, **kwargs, ) -> Dict[str, Any]: """Add resource to OpenViking (only supports resources scope). @@ -188,6 +189,7 @@ async def add_resource( parent=parent, build_index=build_index, summarize=summarize, + allow_local_path_resolution=allow_local_path_resolution, **kwargs, ) @@ -377,6 +379,7 @@ async def add_skill( ctx: RequestContext, wait: bool = False, timeout: Optional[float] = None, + allow_local_path_resolution: bool = True, ) -> Dict[str, Any]: """Add skill to OpenViking. @@ -394,6 +397,7 @@ async def add_skill( data=data, viking_fs=self._viking_fs, ctx=ctx, + allow_local_path_resolution=allow_local_path_resolution, ) if wait: diff --git a/openviking/utils/media_processor.py b/openviking/utils/media_processor.py index c3e2c0672c..a9c7a0faab 100644 --- a/openviking/utils/media_processor.py +++ b/openviking/utils/media_processor.py @@ -9,7 +9,12 @@ from openviking.parse import DocumentConverter, parse from openviking.parse.base import ParseResult +from openviking.server.local_input_guard import ( + is_remote_resource_source, + looks_like_local_path, +) from openviking.utils.zip_safe import safe_extract_zip +from openviking_cli.exceptions import PermissionDeniedError from openviking_cli.utils.logger import get_logger if TYPE_CHECKING: @@ -47,6 +52,7 @@ async def process( self, source: str, instruction: str = "", + allow_local_path_resolution: bool = True, **kwargs, ) -> ParseResult: """Process any source (file/URL/content) with appropriate strategy.""" @@ -55,7 +61,9 @@ async def process( return await self._process_url(source, instruction) # Check if looks like a file path (short enough and no newlines) - is_potential_path = len(source) <= 1024 and "\n" not in source + is_potential_path = ( + allow_local_path_resolution and len(source) <= 1024 and "\n" not in source + ) if is_potential_path: path = Path(source) if path.exists(): @@ -66,12 +74,18 @@ async def process( logger.warning(f"Path {path} does not exist") raise FileNotFoundError(f"Path {path} does not exist") + if not allow_local_path_resolution and looks_like_local_path(source): + raise PermissionDeniedError( + "HTTP server only accepts remote resource URLs or temp-uploaded files; " + "direct host filesystem paths are not allowed." + ) + # Treat as raw content return await parse(source, instruction=instruction) def _is_url(self, source: str) -> bool: """Check if source is a URL.""" - return source.startswith(("http://", "https://", "git@", "ssh://", "git://")) + return is_remote_resource_source(source) async def _process_url(self, url: str, instruction: str, **kwargs) -> ParseResult: """Process URL source.""" @@ -115,8 +129,7 @@ def _is_feishu_url(url: str) -> bool: path = parsed.path is_feishu_domain = host.endswith(".feishu.cn") or host.endswith(".larksuite.com") has_doc_path = any( - path == f"/{t}" or path.startswith(f"/{t}/") - for t in ("docx", "wiki", "sheets", "base") + path == f"/{t}" or path.startswith(f"/{t}/") for t in ("docx", "wiki", "sheets", "base") ) return is_feishu_domain and has_doc_path diff --git a/openviking/utils/skill_processor.py b/openviking/utils/skill_processor.py index 61e3507479..dedbcf4c11 100644 --- a/openviking/utils/skill_processor.py +++ b/openviking/utils/skill_processor.py @@ -16,11 +16,12 @@ from openviking.core.mcp_converter import is_mcp_format, mcp_to_skill from openviking.core.skill_loader import SkillLoader from openviking.server.identity import RequestContext +from openviking.server.local_input_guard import deny_direct_local_skill_input from openviking.storage import VikingDBManager from openviking.storage.queuefs.embedding_msg_converter import EmbeddingMsgConverter from openviking.storage.viking_fs import VikingFS -from openviking.utils.zip_safe import safe_extract_zip from openviking.telemetry import get_current_telemetry +from openviking.utils.zip_safe import safe_extract_zip from openviking_cli.utils import get_logger from openviking_cli.utils.config import get_openviking_config @@ -48,6 +49,7 @@ async def process_skill( data: Any, viking_fs: VikingFS, ctx: RequestContext, + allow_local_path_resolution: bool = True, ) -> Dict[str, Any]: """ Process and store a skill. @@ -68,7 +70,10 @@ async def process_skill( telemetry = get_current_telemetry() parse_start = time.perf_counter() - skill_dict, auxiliary_files, base_path = self._parse_skill(data) + skill_dict, auxiliary_files, base_path = self._parse_skill( + data, + allow_local_path_resolution=allow_local_path_resolution, + ) telemetry.set( "skill.parse.duration_ms", round((time.perf_counter() - parse_start) * 1000, 3) ) @@ -136,7 +141,11 @@ async def process_skill( "auxiliary_files": len(auxiliary_files), } - def _parse_skill(self, data: Any) -> tuple[Dict[str, Any], List[Path], Optional[Path]]: + def _parse_skill( + self, + data: Any, + allow_local_path_resolution: bool = True, + ) -> tuple[Dict[str, Any], List[Path], Optional[Path]]: """Parse skill data from various formats.""" if data is None: raise ValueError("Skill data cannot be None") @@ -145,15 +154,18 @@ def _parse_skill(self, data: Any) -> tuple[Dict[str, Any], List[Path], Optional[ base_path = None if isinstance(data, str): - path_obj = Path(data) - if path_obj.exists(): - if zipfile.is_zipfile(path_obj): - temp_dir = Path(tempfile.mkdtemp()) - with zipfile.ZipFile(path_obj, "r") as zipf: - safe_extract_zip(zipf, temp_dir) - data = temp_dir - else: - data = path_obj + if allow_local_path_resolution: + path_obj = Path(data) + if path_obj.exists(): + if zipfile.is_zipfile(path_obj): + temp_dir = Path(tempfile.mkdtemp()) + with zipfile.ZipFile(path_obj, "r") as zipf: + safe_extract_zip(zipf, temp_dir) + data = temp_dir + else: + data = path_obj + else: + deny_direct_local_skill_input(data) if isinstance(data, Path): if data.is_dir(): diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index f7dffc9ecd..fe81776cb3 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -301,7 +301,7 @@ def _zip_directory(self, dir_path: str) -> str: return str(zip_path) async def _upload_temp_file(self, file_path: str) -> str: - """Upload a file to /api/v1/resources/temp_upload and return the temp_path.""" + """Upload a file to /api/v1/resources/temp_upload and return the temp_file_id.""" with open(file_path, "rb") as f: files = {"file": (Path(file_path).name, f, "application/octet-stream")} response = await self._http.post( @@ -309,7 +309,7 @@ async def _upload_temp_file(self, file_path: str) -> str: files=files, ) result = self._handle_response(response) - return result.get("temp_path", "") + return result.get("temp_file_id", "") # ============= Resource Management ============= @@ -357,13 +357,13 @@ async def add_resource( if path_obj.is_dir(): zip_path = self._zip_directory(path) try: - temp_path = await self._upload_temp_file(zip_path) - request_data["temp_path"] = temp_path + temp_file_id = await self._upload_temp_file(zip_path) + request_data["temp_file_id"] = temp_file_id finally: Path(zip_path).unlink(missing_ok=True) elif path_obj.is_file(): - temp_path = await self._upload_temp_file(path) - request_data["temp_path"] = temp_path + temp_file_id = await self._upload_temp_file(path) + request_data["temp_file_id"] = temp_file_id else: request_data["path"] = path else: @@ -396,13 +396,13 @@ async def add_skill( if path_obj.is_dir(): zip_path = self._zip_directory(data) try: - temp_path = await self._upload_temp_file(zip_path) - request_data["temp_path"] = temp_path + temp_file_id = await self._upload_temp_file(zip_path) + request_data["temp_file_id"] = temp_file_id finally: Path(zip_path).unlink(missing_ok=True) elif path_obj.is_file(): - temp_path = await self._upload_temp_file(data) - request_data["temp_path"] = temp_path + temp_file_id = await self._upload_temp_file(data) + request_data["temp_file_id"] = temp_file_id else: request_data["data"] = data else: @@ -793,11 +793,13 @@ async def import_ovpack( } file_path_obj = Path(file_path) - if file_path_obj.exists() and file_path_obj.is_file(): - temp_path = await self._upload_temp_file(file_path) - request_data["temp_path"] = temp_path - else: - request_data["file_path"] = file_path + if not file_path_obj.exists(): + raise FileNotFoundError(f"Local ovpack file not found: {file_path}") + if not file_path_obj.is_file(): + raise ValueError(f"Path {file_path} is not a file") + + temp_file_id = await self._upload_temp_file(file_path) + request_data["temp_file_id"] = temp_file_id response = await self._http.post( "/api/v1/pack/import", diff --git a/tests/api_test/api/client.py b/tests/api_test/api/client.py index 2562cd2bdc..509ac71283 100644 --- a/tests/api_test/api/client.py +++ b/tests/api_test/api/client.py @@ -1,5 +1,9 @@ import json +import os +import tempfile import time +import zipfile +from pathlib import Path from typing import Any, Dict, Optional from urllib.parse import urlencode @@ -113,6 +117,39 @@ def _build_url(self, base: str, endpoint: str, params: Optional[Dict[str, Any]] url = f"{url}?{urlencode(params)}" return url + def _upload_temp_file(self, file_path: str) -> str: + endpoint = "/api/v1/resources/temp_upload" + url = self._build_url(self.server_url, endpoint) + original_content_type = self.session.headers.pop("Content-Type", None) + try: + with open(file_path, "rb") as file_obj: + response = self._request_with_retry( + "POST", + url, + files={"file": (Path(file_path).name, file_obj)}, + ) + finally: + if original_content_type is not None: + self.session.headers["Content-Type"] = original_content_type + + response.raise_for_status() + data = response.json() + return data["result"]["temp_file_id"] + + def _zip_directory_for_upload(self, directory_path: str) -> str: + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_file: + temp_zip_path = tmp_file.name + try: + with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + base_path = Path(directory_path) + for file_path in base_path.rglob("*"): + if file_path.is_file(): + zf.write(file_path, arcname=file_path.relative_to(base_path)) + return temp_zip_path + except Exception: + Path(temp_zip_path).unlink(missing_ok=True) + raise + def find( self, query: str, @@ -207,14 +244,26 @@ def add_resource( ) -> requests.Response: endpoint = "/api/v1/resources" url = self._build_url(self.server_url, endpoint) - payload = {"path": path} + payload = {} + cleanup_path = None + if os.path.isfile(path): + payload["temp_file_id"] = self._upload_temp_file(path) + elif os.path.isdir(path): + cleanup_path = self._zip_directory_for_upload(path) + payload["temp_file_id"] = self._upload_temp_file(cleanup_path) + else: + payload["path"] = path if to: payload["to"] = to if reason: payload["reason"] = reason if wait: payload["wait"] = wait - return self._request_with_retry("POST", url, json=payload) + try: + return self._request_with_retry("POST", url, json=payload) + finally: + if cleanup_path is not None: + Path(cleanup_path).unlink(missing_ok=True) def wait_processed(self) -> requests.Response: endpoint = "/api/v1/system/wait" @@ -311,9 +360,14 @@ def import_ovpack( ) -> requests.Response: endpoint = "/api/v1/pack/import" url = self._build_url(self.server_url, endpoint) - return self.session.post( + if not os.path.isfile(file_path): + raise FileNotFoundError(f"Local ovpack file not found: {file_path}") + payload = {"parent": parent, "force": force, "vectorize": vectorize} + payload["temp_file_id"] = self._upload_temp_file(file_path) + return self._request_with_retry( + "POST", url, - json={"file_path": file_path, "parent": parent, "force": force, "vectorize": vectorize}, + json=payload, ) def fs_mv(self, from_uri: str, to_uri: str) -> requests.Response: diff --git a/tests/client/test_http_client_local_upload.py b/tests/client/test_http_client_local_upload.py index 27073a68a5..4d2f771eac 100644 --- a/tests/client/test_http_client_local_upload.py +++ b/tests/client/test_http_client_local_upload.py @@ -27,7 +27,7 @@ async def test_add_skill_uploads_local_file_even_when_url_is_localhost(tmp_path) client._http = fake_http async def fake_upload(_path: str) -> str: - return "/tmp/uploaded-skill.md" + return "upload_skill.md" client._upload_temp_file = fake_upload client._handle_response_data = lambda _response: {"result": {"status": "ok"}} @@ -36,7 +36,7 @@ async def fake_upload(_path: str) -> str: call = fake_http.calls[-1] assert call["path"] == "/api/v1/skills" - assert call["json"]["temp_path"] == "/tmp/uploaded-skill.md" + assert call["json"]["temp_file_id"] == "upload_skill.md" assert "data" not in call["json"] @@ -50,7 +50,7 @@ async def test_add_resource_uploads_local_file_even_when_url_is_localhost(tmp_pa client._http = fake_http async def fake_upload(_path: str) -> str: - return "/tmp/uploaded-resource.md" + return "upload_resource.md" client._upload_temp_file = fake_upload client._handle_response_data = lambda _response: { @@ -61,7 +61,7 @@ async def fake_upload(_path: str) -> str: call = fake_http.calls[-1] assert call["path"] == "/api/v1/resources" - assert call["json"]["temp_path"] == "/tmp/uploaded-resource.md" + assert call["json"]["temp_file_id"] == "upload_resource.md" assert "path" not in call["json"] @@ -75,7 +75,7 @@ async def test_import_ovpack_uploads_local_file_even_when_url_is_localhost(tmp_p client._http = fake_http async def fake_upload(_path: str) -> str: - return "/tmp/uploaded-pack.ovpack" + return "upload_pack.ovpack" client._upload_temp_file = fake_upload client._handle_response = lambda _response: {"uri": "viking://resources/imported"} @@ -84,5 +84,34 @@ async def fake_upload(_path: str) -> str: call = fake_http.calls[-1] assert call["path"] == "/api/v1/pack/import" - assert call["json"]["temp_path"] == "/tmp/uploaded-pack.ovpack" + assert call["json"]["temp_file_id"] == "upload_pack.ovpack" assert "file_path" not in call["json"] + + +@pytest.mark.asyncio +async def test_import_ovpack_fails_fast_when_local_file_is_missing(tmp_path): + client = AsyncHTTPClient(url="http://localhost:1933") + fake_http = _FakeHTTPClient() + client._http = fake_http + + missing_path = tmp_path / "missing.ovpack" + + with pytest.raises(FileNotFoundError, match="Local ovpack file not found"): + await client.import_ovpack(str(missing_path), parent="viking://resources/") + + assert fake_http.calls == [] + + +@pytest.mark.asyncio +async def test_import_ovpack_fails_fast_when_path_is_directory(tmp_path): + client = AsyncHTTPClient(url="http://localhost:1933") + fake_http = _FakeHTTPClient() + client._http = fake_http + + pack_dir = tmp_path / "pack_dir" + pack_dir.mkdir() + + with pytest.raises(ValueError, match="is not a file"): + await client.import_ovpack(str(pack_dir), parent="viking://resources/") + + assert fake_http.calls == [] diff --git a/tests/server/conftest.py b/tests/server/conftest.py index 3bc0e40fd7..6b3fce7335 100644 --- a/tests/server/conftest.py +++ b/tests/server/conftest.py @@ -8,6 +8,7 @@ import threading import time from pathlib import Path +from types import SimpleNamespace import httpx import pytest @@ -107,6 +108,23 @@ def sample_markdown_file(temp_dir: Path) -> Path: return f +@pytest.fixture(scope="function") +def upload_temp_dir(temp_dir: Path, monkeypatch) -> Path: + """Use the per-test temp directory as the HTTP upload temp dir.""" + config = SimpleNamespace( + storage=SimpleNamespace(get_upload_temp_dir=lambda: temp_dir), + ) + monkeypatch.setattr( + "openviking.server.routers.resources.get_openviking_config", + lambda: config, + ) + monkeypatch.setattr( + "openviking.server.routers.pack.get_openviking_config", + lambda: config, + ) + return temp_dir + + @pytest_asyncio.fixture(scope="function") async def service(temp_dir: Path, monkeypatch): """Create and initialize an OpenVikingService in embedded mode.""" diff --git a/tests/server/test_api_local_input_security.py b/tests/server/test_api_local_input_security.py new file mode 100644 index 0000000000..910638c6e3 --- /dev/null +++ b/tests/server/test_api_local_input_security.py @@ -0,0 +1,154 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +"""Security tests for HTTP server local input handling.""" + +import io +import zipfile + +import httpx + + +async def test_add_skill_accepts_temp_uploaded_file( + client: httpx.AsyncClient, + upload_temp_dir, +): + skill_file = upload_temp_dir / "skill.md" + skill_file.write_text( + """--- +name: uploaded-skill +description: temp uploaded skill +--- + +# Uploaded Skill +""" + ) + + resp = await client.post( + "/api/v1/skills", + json={"temp_file_id": skill_file.name, "wait": True}, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert body["result"]["uri"].startswith("viking://agent/skills/") + + +async def test_add_skill_rejects_direct_local_path(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/skills", + json={"data": "/app/ov.conf"}, + ) + assert resp.status_code == 403 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" + + +async def test_add_skill_rejects_legacy_temp_path_field(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/skills", + json={"temp_path": "upload_skill.md"}, + ) + assert resp.status_code == 422 + + +async def test_add_skill_accepts_raw_skill_content(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/skills", + json={ + "data": """--- +name: inline-skill +description: inline +--- + +# Inline Skill +""" + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert body["result"]["uri"].startswith("viking://agent/skills/") + + +def _build_ovpack_bytes() -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, "w") as zf: + zf.writestr("pkg/_._meta.json", '{"uri": "viking://resources/pkg"}') + zf.writestr("pkg/content.md", "# Demo\n") + return buffer.getvalue() + + +async def test_import_ovpack_accepts_temp_uploaded_file( + client: httpx.AsyncClient, + upload_temp_dir, +): + ovpack_file = upload_temp_dir / "demo.ovpack" + ovpack_file.write_bytes(_build_ovpack_bytes()) + + resp = await client.post( + "/api/v1/pack/import", + json={ + "temp_file_id": ovpack_file.name, + "parent": "viking://resources/imported", + "vectorize": False, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert body["result"]["uri"].startswith("viking://resources/imported/") + + +async def test_import_ovpack_rejects_direct_file_path_field(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/pack/import", + json={ + "file_path": "/tmp/demo.ovpack", + "parent": "viking://resources/imported", + "vectorize": False, + }, + ) + assert resp.status_code == 422 + + +async def test_import_ovpack_rejects_legacy_temp_path_field(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/pack/import", + json={ + "temp_path": "upload_pack.ovpack", + "parent": "viking://resources/imported", + "vectorize": False, + }, + ) + assert resp.status_code == 422 + + +async def test_import_ovpack_rejects_forged_temp_file_id( + client: httpx.AsyncClient, + upload_temp_dir, +): + outside_file = upload_temp_dir.parent / "outside.ovpack" + outside_file.write_bytes(_build_ovpack_bytes()) + + resp = await client.post( + "/api/v1/pack/import", + json={ + "temp_file_id": "../outside.ovpack", + "parent": "viking://resources/imported", + "vectorize": False, + }, + ) + assert resp.status_code == 403 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" + + +async def test_add_resource_rejects_legacy_temp_path_field(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/resources", + json={"temp_path": "upload_resource.md", "reason": "legacy field"}, + ) + assert resp.status_code == 422 diff --git a/tests/server/test_api_relations.py b/tests/server/test_api_relations.py index cad2efdaf5..d9942e2770 100644 --- a/tests/server/test_api_relations.py +++ b/tests/server/test_api_relations.py @@ -13,16 +13,16 @@ async def test_get_relations_empty(client_with_resource): assert isinstance(body["result"], list) -async def test_link_and_get_relations(client_with_resource): +async def test_link_and_get_relations(client_with_resource, upload_temp_dir): client, uri = client_with_resource # Create a second resource to link to - from tests.server.conftest import SAMPLE_MD_CONTENT, TEST_TMP_DIR + from tests.server.conftest import SAMPLE_MD_CONTENT - f2 = TEST_TMP_DIR / "link_target.md" + f2 = upload_temp_dir / "link_target.md" f2.write_text(SAMPLE_MD_CONTENT) add_resp = await client.post( "/api/v1/resources", - json={"path": str(f2), "reason": "link target", "wait": True}, + json={"temp_file_id": f2.name, "reason": "link target", "wait": True}, ) target_uri = add_resp.json()["result"]["root_uri"] @@ -46,15 +46,15 @@ async def test_link_and_get_relations(client_with_resource): assert len(body["result"]) > 0 -async def test_unlink(client_with_resource): +async def test_unlink(client_with_resource, upload_temp_dir): client, uri = client_with_resource - from tests.server.conftest import SAMPLE_MD_CONTENT, TEST_TMP_DIR + from tests.server.conftest import SAMPLE_MD_CONTENT - f2 = TEST_TMP_DIR / "unlink_target.md" + f2 = upload_temp_dir / "unlink_target.md" f2.write_text(SAMPLE_MD_CONTENT) add_resp = await client.post( "/api/v1/resources", - json={"path": str(f2), "reason": "unlink target", "wait": True}, + json={"temp_file_id": f2.name, "reason": "unlink target", "wait": True}, ) target_uri = add_resp.json()["result"]["root_uri"] @@ -72,17 +72,17 @@ async def test_unlink(client_with_resource): assert resp.json()["status"] == "ok" -async def test_link_multiple_targets(client_with_resource): +async def test_link_multiple_targets(client_with_resource, upload_temp_dir): client, uri = client_with_resource - from tests.server.conftest import SAMPLE_MD_CONTENT, TEST_TMP_DIR + from tests.server.conftest import SAMPLE_MD_CONTENT targets = [] for i in range(2): - f = TEST_TMP_DIR / f"multi_target_{i}.md" + f = upload_temp_dir / f"multi_target_{i}.md" f.write_text(SAMPLE_MD_CONTENT) add_resp = await client.post( "/api/v1/resources", - json={"path": str(f), "reason": "multi", "wait": True}, + json={"temp_file_id": f.name, "reason": "multi", "wait": True}, ) targets.append(add_resp.json()["result"]["root_uri"]) diff --git a/tests/server/test_api_resources.py b/tests/server/test_api_resources.py index 2159ac6ac6..c320e0f5a8 100644 --- a/tests/server/test_api_resources.py +++ b/tests/server/test_api_resources.py @@ -3,18 +3,20 @@ """Tests for resource management endpoints.""" -from types import SimpleNamespace - import httpx from openviking.telemetry import get_current_telemetry -async def test_add_resource_success(client: httpx.AsyncClient, sample_markdown_file): +async def test_add_resource_success( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "test resource", "wait": False, }, @@ -29,11 +31,15 @@ async def test_add_resource_success(client: httpx.AsyncClient, sample_markdown_f assert body["result"]["root_uri"].startswith("viking://") -async def test_add_resource_with_wait(client: httpx.AsyncClient, sample_markdown_file): +async def test_add_resource_with_wait( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "test resource", "wait": True, }, @@ -44,11 +50,15 @@ async def test_add_resource_with_wait(client: httpx.AsyncClient, sample_markdown assert "root_uri" in body["result"] -async def test_add_resource_with_telemetry_wait(client: httpx.AsyncClient, sample_markdown_file): +async def test_add_resource_with_telemetry_wait( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "telemetry resource", "wait": True, "telemetry": True, @@ -63,14 +73,17 @@ async def test_add_resource_with_telemetry_wait(client: httpx.AsyncClient, sampl semantic = telemetry_summary.get("semantic_nodes") if semantic is not None: assert semantic["total"] is None or semantic["done"] == semantic["total"] - assert semantic["pending"] in (None, 0) - assert semantic["running"] in (None, 0) + assert semantic.get("pending") in (None, 0) + assert semantic.get("running") in (None, 0) assert "resource" in telemetry_summary assert "memory" not in telemetry_summary async def test_add_resource_with_telemetry_includes_resource_breakdown( - client: httpx.AsyncClient, service, monkeypatch + client: httpx.AsyncClient, + service, + monkeypatch, + upload_temp_dir, ): async def fake_add_resource(**kwargs): telemetry = get_current_telemetry() @@ -93,10 +106,13 @@ async def fake_add_resource(**kwargs): monkeypatch.setattr(service.resources, "add_resource", fake_add_resource) + demo_file = upload_temp_dir / "demo.md" + demo_file.write_text("# demo\n") + resp = await client.post( "/api/v1/resources", json={ - "path": "/tmp/demo.md", + "temp_file_id": demo_file.name, "reason": "telemetry resource", "wait": True, "telemetry": True, @@ -117,12 +133,14 @@ async def fake_add_resource(**kwargs): async def test_add_resource_with_summary_only_telemetry( - client: httpx.AsyncClient, sample_markdown_file + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, ): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "summary only telemetry resource", "wait": True, "telemetry": {"summary": True}, @@ -139,12 +157,14 @@ async def test_add_resource_with_summary_only_telemetry( async def test_add_resource_rejects_events_only_telemetry( - client: httpx.AsyncClient, sample_markdown_file + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, ): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "events only telemetry", "wait": False, "telemetry": {"summary": False, "events": True}, @@ -162,17 +182,21 @@ async def test_add_resource_file_not_found(client: httpx.AsyncClient): "/api/v1/resources", json={"path": "/nonexistent/file.txt", "reason": "test"}, ) - assert resp.status_code == 200 + assert resp.status_code == 403 body = resp.json() - assert body["status"] == "ok" - assert "errors" in body["result"] and len(body["result"]["errors"]) > 0 + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" -async def test_add_resource_with_to(client: httpx.AsyncClient, sample_markdown_file): +async def test_add_resource_with_to( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "to": "viking://resources/custom/sample", "reason": "test resource", }, @@ -193,10 +217,14 @@ async def test_wait_processed_empty_queue(client: httpx.AsyncClient): assert body["status"] == "ok" -async def test_wait_processed_after_add(client: httpx.AsyncClient, sample_markdown_file): +async def test_wait_processed_after_add( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): await client.post( "/api/v1/resources", - json={"path": str(sample_markdown_file), "reason": "test"}, + json={"temp_file_id": sample_markdown_file.name, "reason": "test"}, ) resp = await client.post( "/api/v1/system/wait", @@ -207,12 +235,14 @@ async def test_wait_processed_after_add(client: httpx.AsyncClient, sample_markdo async def test_add_resource_with_watch_interval_requires_to( - client: httpx.AsyncClient, sample_markdown_file + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, ): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "test resource with watch interval", "watch_interval": 5.0, }, @@ -224,12 +254,14 @@ async def test_add_resource_with_watch_interval_requires_to( async def test_add_resource_with_default_watch_interval( - client: httpx.AsyncClient, sample_markdown_file + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, ): resp = await client.post( "/api/v1/resources", json={ - "path": str(sample_markdown_file), + "temp_file_id": sample_markdown_file.name, "reason": "test resource with default watch interval", }, ) @@ -239,14 +271,7 @@ async def test_add_resource_with_default_watch_interval( assert "root_uri" in body["result"] -async def test_temp_upload_success(client: httpx.AsyncClient, temp_dir, monkeypatch): - monkeypatch.setattr( - "openviking.server.routers.resources.get_openviking_config", - lambda: SimpleNamespace( - storage=SimpleNamespace(get_upload_temp_dir=lambda: temp_dir), - ), - ) - +async def test_temp_upload_success(client: httpx.AsyncClient, upload_temp_dir): resp = await client.post( "/api/v1/resources/temp_upload", files={"file": ("sample.md", b"# upload\n", "text/markdown")}, @@ -255,19 +280,14 @@ async def test_temp_upload_success(client: httpx.AsyncClient, temp_dir, monkeypa body = resp.json() assert body["status"] == "ok" assert "telemetry" not in body - assert body["result"]["temp_path"].endswith(".md") + assert body["result"]["temp_file_id"].endswith(".md") + assert "/" not in body["result"]["temp_file_id"] async def test_temp_upload_with_telemetry_returns_summary( - client: httpx.AsyncClient, temp_dir, monkeypatch + client: httpx.AsyncClient, + upload_temp_dir, ): - monkeypatch.setattr( - "openviking.server.routers.resources.get_openviking_config", - lambda: SimpleNamespace( - storage=SimpleNamespace(get_upload_temp_dir=lambda: temp_dir), - ), - ) - resp = await client.post( "/api/v1/resources/temp_upload", files={"file": ("sample.md", b"# upload\n", "text/markdown")}, @@ -276,5 +296,74 @@ async def test_temp_upload_with_telemetry_returns_summary( assert resp.status_code == 200 body = resp.json() assert body["status"] == "ok" - assert body["result"]["temp_path"].endswith(".md") + assert body["result"]["temp_file_id"].endswith(".md") + assert "/" not in body["result"]["temp_file_id"] assert body["telemetry"]["summary"]["operation"] == "resources.temp_upload" + + +async def test_add_resource_rejects_direct_local_path(client: httpx.AsyncClient): + resp = await client.post( + "/api/v1/resources", + json={"path": "/app/ov.conf", "reason": "security test"}, + ) + assert resp.status_code == 403 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" + + +async def test_add_resource_accepts_temp_uploaded_file( + client: httpx.AsyncClient, + upload_temp_dir, +): + upload_resp = await client.post( + "/api/v1/resources/temp_upload", + files={"file": ("sample.md", b"# upload\n", "text/markdown")}, + ) + temp_file_id = upload_resp.json()["result"]["temp_file_id"] + + resp = await client.post( + "/api/v1/resources", + json={"temp_file_id": temp_file_id, "reason": "uploaded resource"}, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert body["result"]["root_uri"].startswith("viking://") + + +async def test_add_resource_rejects_temp_file_id_directory( + client: httpx.AsyncClient, + upload_temp_dir, +): + temp_subdir = upload_temp_dir / "dir_upload" + temp_subdir.mkdir() + + resp = await client.post( + "/api/v1/resources", + json={"temp_file_id": temp_subdir.name, "reason": "dir upload"}, + ) + assert resp.status_code == 403 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" + + +async def test_add_resource_rejects_temp_file_id_symlink( + client: httpx.AsyncClient, + upload_temp_dir, + tmp_path, +): + real_file = tmp_path / "outside.md" + real_file.write_text("# outside\n") + symlink_path = upload_temp_dir / "linked.md" + symlink_path.symlink_to(real_file) + + resp = await client.post( + "/api/v1/resources", + json={"temp_file_id": symlink_path.name, "reason": "symlink upload"}, + ) + assert resp.status_code == 403 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" diff --git a/tests/server/test_error_scenarios.py b/tests/server/test_error_scenarios.py index 13119d9cfb..2e283469c3 100644 --- a/tests/server/test_error_scenarios.py +++ b/tests/server/test_error_scenarios.py @@ -43,16 +43,16 @@ async def test_not_found_resource_returns_structured_error( async def test_add_resource_file_not_found(client: httpx.AsyncClient): """Adding a resource with non-existent file path. - The service accepts the request (queues it) and returns 200. - The actual error surfaces during processing. + HTTP server no longer accepts direct host filesystem paths. """ resp = await client.post( "/api/v1/resources", json={"path": "/tmp/nonexistent_file_xyz_12345.md", "reason": "test"}, ) body = resp.json() - # Service queues the request and returns ok - assert resp.status_code == 200 or body["status"] == "error" + assert resp.status_code == 403 + assert body["status"] == "error" + assert body["error"]["code"] == "PERMISSION_DENIED" async def test_empty_body_on_post(client: httpx.AsyncClient): diff --git a/tests/server/test_http_client_sdk.py b/tests/server/test_http_client_sdk.py index 78a5f6626e..dd6af738de 100644 --- a/tests/server/test_http_client_sdk.py +++ b/tests/server/test_http_client_sdk.py @@ -3,6 +3,9 @@ """SDK tests using AsyncHTTPClient against a real uvicorn server.""" +import io +import zipfile + import pytest_asyncio from openviking_cli.client.http import AsyncHTTPClient @@ -68,6 +71,29 @@ async def test_sdk_add_skill_from_local_file(http_client): assert result["uri"].startswith("viking://agent/skills/") +def _build_ovpack_bytes() -> bytes: + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, "w") as zf: + zf.writestr("pkg/_._meta.json", '{"uri": "viking://resources/pkg"}') + zf.writestr("pkg/content.md", "# Demo\n") + return buffer.getvalue() + + +async def test_sdk_import_ovpack_from_local_file(http_client): + client, _ = http_client + f = TEST_TMP_DIR / "sdk_import.ovpack" + f.parent.mkdir(parents=True, exist_ok=True) + f.write_bytes(_build_ovpack_bytes()) + + uri = await client.import_ovpack( + str(f), + parent="viking://resources/imported/", + force=True, + vectorize=False, + ) + assert uri.startswith("viking://resources/imported/") + + async def test_sdk_wait_processed(http_client): client, _ = http_client result = await client.wait_processed()