feat(cli): ask/chat/batch 增加 --format json 结构化输出

Test User · Test User · commit 9914a5f8d083 · 2026-05-06T09:16:33.000+08:00
- ask --format json: 输出 {content, thinking, usage, model, elapsed_ms}
- chat --format json: 仅单条模式支持, 多轮会显式报错
- batch --format json: stdout 输出聚合汇总
  {input_file, output_file, record_count, success/failure_count,
   format_type, model, summary}
- 顶层 help 追加标准化 Exit Codes 表和 agent-friendly 约定说明
- 三命令均做 --format 取值校验, 错误提示给出修复建议

配合上一 commit 的 stderr 进度分离, agent/脚本可直接 stdout 解析 JSON。
diff --git a/flexllm/cli/__init__.py b/flexllm/cli/__init__.py
@@ -23,7 +23,25 @@
   flexllm test                            # 测试 LLM 连接
 
 配置: ~/.flexllm/config.yaml (运行 flexllm init 创建)
-环境变量: FLEXLLM_BASE_URL, FLEXLLM_API_KEY, FLEXLLM_MODEL""",
+环境变量: FLEXLLM_BASE_URL, FLEXLLM_API_KEY, FLEXLLM_MODEL
+
+\b
+Exit Codes (Agent-friendly, cross-version stable):
+  0   成功
+  1   通用错误
+  2   参数/用法错误（非法值、缺少必选）
+  3   资源未找到（模型/配置/文件）
+  4   认证失败（API Key 无效、额度不足）
+  5   冲突（资源已存在）
+  6   网络错误（常为 retryable）
+  7   依赖缺失（缺 pip 包）
+  8   文件 IO 错误
+  10  Dry-run 成功（非实际执行）
+
+\b
+Agent-friendly JSON 输出:
+  核心命令支持 --format json（ask/chat/batch），stdout 为结构化数据，
+  stderr 为进度/日志；错误在非 TTY 自动以 JSON 输出到 stderr。""",
         add_completion=True,
         no_args_is_help=True,
     )
diff --git a/flexllm/cli/chat_helpers.py b/flexllm/cli/chat_helpers.py
@@ -22,8 +22,11 @@ def single_chat(
     user_template=None,
     thinking=None,
     extract=False,
+    output_format="text",
 ):
     """单次对话"""
+    import json
+    import time
 
     async def _run():
         from flexllm import LLMClient
@@ -39,6 +42,26 @@ async def _run():
             if thinking is not None:
                 kwargs["thinking"] = thinking
 
+            if output_format == "json":
+                t0 = time.perf_counter()
+                result = await client.chat_completions(messages, **kwargs)
+                elapsed_ms = int((time.perf_counter() - t0) * 1000)
+                output = str(result) if not isinstance(result, str) else result
+                thinking_text = None
+                usage = None
+                if hasattr(result, "data") and isinstance(result.data, dict):
+                    thinking_text = result.data.get("thinking")
+                    usage = result.data.get("usage")
+                payload = {
+                    "content": output,
+                    "thinking": thinking_text,
+                    "usage": usage,
+                    "model": model,
+                    "elapsed_ms": elapsed_ms,
+                }
+                print(json.dumps(payload, ensure_ascii=False))
+                return
+
             if stream and not extract:
                 print("Assistant: ", end="", flush=True)
                 async for chunk in client.chat_completions_stream(messages, **kwargs):
diff --git a/flexllm/cli/commands.py b/flexllm/cli/commands.py
@@ -57,6 +57,13 @@ def ask(
         files: Annotated[
             list[str] | None, Option("-f", "--file", help="附加文件内容到 prompt（可多次指定）")
         ] = None,
+        format: Annotated[
+            str,
+            Option(
+                "--format",
+                help="输出格式: text(默认) 或 json(结构化: {content, thinking, usage, model, elapsed_ms})",
+            ),
+        ] = "text",
         dry_run: Annotated[bool, Option("--dry-run", help="预览操作内容，不实际执行")] = False,
     ):
         """LLM 快速问答（支持管道输入）
@@ -79,9 +86,21 @@ def ask(
           flexllm ask "用 Python 写个快排" -x
           flexllm ask "用 Python 写个快排" -x > sort.py
 
+        JSON 输出 (--format json):  给 agent/脚本解析
+          flexllm ask "你好" --format json
+          # {"content":"...","thinking":null,"usage":{...},"model":"...","elapsed_ms":123}
+
         预览:
           flexllm ask "测试" --dry-run              # 预览请求内容
         """
+        if format not in ("text", "json"):
+            cli_error(
+                ErrorType.INVALID_ARGS,
+                "--format 参数值无效",
+                context={"arg": "--format", "received": format, "expected": ["text", "json"]},
+                suggestion="使用 --format text 或 --format json",
+                doc="flexllm ask --help",
+            )
         stdin_content = None
         if not sys.stdin.isatty():
             stdin_content = sys.stdin.read().strip()
@@ -147,8 +166,12 @@ async def _ask():
             async with LLMClient(model=model_id, base_url=base_url, api_key=api_key) as client:
                 return await client.chat_completions(messages, **model_params)
 
+        import time
+
         try:
+            t0 = time.perf_counter()
             result = asyncio.run(_ask())
+            elapsed_ms = int((time.perf_counter() - t0) * 1000)
             if result is None:
                 cli_error(
                     ErrorType.GENERAL,
@@ -173,6 +196,23 @@ async def _ask():
                     retryable=True,
                 )
             output = str(result) if not isinstance(result, str) else result
+
+            if format == "json":
+                thinking_text = None
+                usage = None
+                if hasattr(result, "data") and isinstance(result.data, dict):
+                    thinking_text = result.data.get("thinking")
+                    usage = result.data.get("usage")
+                payload = {
+                    "content": output,
+                    "thinking": thinking_text,
+                    "usage": usage,
+                    "model": model_id,
+                    "elapsed_ms": elapsed_ms,
+                }
+                print(json.dumps(payload, ensure_ascii=False))
+                return
+
             if extract:
                 code = extract_code_block(output)
                 if code is not None:
@@ -228,6 +268,13 @@ def chat(
         files: Annotated[
             list[str] | None, Option("-f", "--file", help="附加文件内容到 prompt（可多次指定）")
         ] = None,
+        format: Annotated[
+            str,
+            Option(
+                "--format",
+                help="输出格式: text(默认) 或 json(仅单条模式, 多轮模式会报错)",
+            ),
+        ] = "text",
         dry_run: Annotated[bool, Option("--dry-run", help="预览操作内容，不实际执行")] = False,
     ):
         """交互式对话
@@ -245,9 +292,28 @@ def chat(
         代码提取 (-x):  只输出回复中的代码块（仅单条模式）
           flexllm chat "写个 hello world" -x
 
+        JSON 输出 (--format json):  仅单条模式支持
+          flexllm chat "你好" --format json
+
         预览:
           flexllm chat "测试" --dry-run             # 预览请求配置
         """
+        if format not in ("text", "json"):
+            cli_error(
+                ErrorType.INVALID_ARGS,
+                "--format 参数值无效",
+                context={"arg": "--format", "received": format, "expected": ["text", "json"]},
+                suggestion="使用 --format text 或 --format json",
+                doc="flexllm chat --help",
+            )
+        if format == "json" and not message:
+            cli_error(
+                ErrorType.INVALID_ARGS,
+                "--format json 仅支持单条对话模式",
+                context={"mode": "interactive", "message": None},
+                suggestion='提供 message 切到单条模式: flexllm chat "你好" --format json',
+                doc="flexllm chat --help",
+            )
         model, base_url, api_key = resolve_model_config(model, base_url, api_key)
         config = get_config()
 
@@ -319,6 +385,7 @@ def chat(
                 user_template,
                 thinking=resolved_thinking,
                 extract=extract,
+                output_format=format,
             )
         elif not sys.stdin.isatty():
             cli_error(
@@ -700,6 +767,13 @@ def batch(
                 help="结构化输出 (json=JSON模式, @file.json=从文件读取, 或 JSON Schema 字符串)",
             ),
         ] = None,
+        format: Annotated[
+            str,
+            Option(
+                "--format",
+                help="输出格式: text(默认) 或 json(stdout 输出聚合汇总 JSON)",
+            ),
+        ] = "text",
         dry_run: Annotated[bool, Option("--dry-run", help="预览操作内容，不实际执行")] = False,
     ):
         """批量处理 JSONL 文件（支持断点续传）
@@ -732,9 +806,20 @@ def batch(
           flexllm batch data.jsonl -o out.jsonl -uf text -sf sys_prompt
           flexllm batch input.jsonl -n 5               # 只处理前5条（试跑）
 
+        JSON 输出 (--format json):  stdout 输出聚合汇总,方便 agent/脚本解析
+          flexllm batch input.jsonl -o out.jsonl --format json
+
         预览:
           flexllm batch input.jsonl --dry-run       # 预览处理计划
         """
+        if format not in ("text", "json"):
+            cli_error(
+                ErrorType.INVALID_ARGS,
+                "--format 参数值无效",
+                context={"arg": "--format", "received": format, "expected": ["text", "json"]},
+                suggestion="使用 --format text 或 --format json",
+                doc="flexllm batch --help",
+            )
         has_stdin = not sys.stdin.isatty()
         if not input and not has_stdin:
             cli_error(
@@ -1048,9 +1133,33 @@ async def _run_batch():
 
             results, summary = asyncio.run(_run_batch())
 
-            if summary:
-                print(f"\n完成: {summary}", file=sys.stderr)
-            print(f"输出文件: {output}", file=sys.stderr)
+            if format == "json":
+                if isinstance(summary, dict):
+                    summary_payload = summary
+                elif summary is None:
+                    summary_payload = None
+                else:
+                    summary_payload = {"raw": str(summary)}
+                success_count = sum(1 for r in results if r is not None)
+                print(
+                    json.dumps(
+                        {
+                            "input_file": input,
+                            "output_file": output,
+                            "record_count": len(records),
+                            "success_count": success_count,
+                            "failure_count": len(records) - success_count,
+                            "format_type": format_type,
+                            "model": model_id,
+                            "summary": summary_payload,
+                        },
+                        ensure_ascii=False,
+                    )
+                )
+            else:
+                if summary:
+                    print(f"\n完成: {summary}", file=sys.stderr)
+                print(f"输出文件: {output}", file=sys.stderr)
 
         except json.JSONDecodeError as e:
             cli_error(