Mq-b
diff --git a/‎README.md‎
Lines changed: 48 additions & 0 deletions b/‎README.md‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎models/cat_vs_dog/best.infer_params.json‎
Lines changed: 120 additions & 0 deletions b/‎models/cat_vs_dog/best.infer_params.json‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎py/inspect_onnx_for_cpp.py‎
Lines changed: 219 additions & 0 deletions b/‎py/inspect_onnx_for_cpp.py‎
Lines changed: 219 additions & 0 deletions
@@ -117,6 +117,54 @@ cmake --build build
    点击 **加载文件夹** → 程序会递归收集子文件夹中的图片 → 点击 **批量推理全部**
 5. 可点击下方结果列表中的任意项，快速切回对应图片查看结果
 
+## 参数对齐记录（2026-04-25）
+
+本次主要做了“Python YOLO 推理”和“C++ ORT 推理”的参数对齐，重点如下：
+
+- 新增脚本 [`py/inspect_onnx_for_cpp.py`](./py/inspect_onnx_for_cpp.py)，可从 ONNX 读取 metadata（`task/imgsz/names/args`）并导出参数文件（如 `best.infer_params.json`）。
+- C++ 侧优化了分类前处理细节（短边缩放取整、中心裁剪取整、下采样插值策略），以更接近 Ultralytics 分类默认流程。
+- C++ 侧增加了 ONNX metadata 的读取与类别名解析（`names`）。
+
+### `best.infer_params.json` 当前作用
+
+`best.infer_params.json` 目前是**对齐/排查用报告文件**，用于查看模型导出参数并和 C++ 实现核对。  
+当前 C++ 推理流程**不会自动读取**这个 JSON 文件。
+
+`inspect_onnx_for_cpp.py` 使用示例：
+
+```bash
+# 在仓库根目录执行（以猫狗模型为例）
+python py/inspect_onnx_for_cpp.py ^
+  --model models/cat_vs_dog/best.onnx ^
+  --image assets/val/dog/dog_21.jpg ^
+  --out models/cat_vs_dog/best.infer_params.json
+```
+
+执行后会在终端打印解析结果，并写出 `models/cat_vs_dog/best.infer_params.json`。
+
+### C++ 当前类别名查找顺序
+
+在选择 `.onnx` 模型后，C++ 按下面顺序找类别名：
+
+1. 模型同级目录：`labels.txt`，找不到再找 `class_names.txt`
+2. ONNX metadata：读取 `names`
+3. 兼容回退：若模型目录名是 `cat_vs_dog`，使用 `cat/dog`
+4. 以上都没有时，显示 `class_N`
+
+对应代码位置：
+
+- [`MainWindow.cpp`](./src/MainWindow.cpp) 的 `selectModel()`
+- [`OnnxClassifier.cpp`](./src/OnnxClassifier.cpp) 的 `loadModel()` 与 `modelClassNames()`
+
+### 对猫狗模型和试剂模型的建议
+
+两个模型都建议各自放在独立目录，并在模型目录至少提供以下文件之一：
+
+- `labels.txt`（推荐）
+- 或依赖 ONNX metadata 的 `names`
+
+这样 C++ 在切换猫狗模型/试剂模型时会自动加载对应类别名，避免串类。
+
 ## 模型来源
 
 模型由 [`py`](./py/) 目录中的 Python 脚本基于 YOLO 训练的分类模型导出，训练脚本示例：
 
@@ -0,0 +1,120 @@
+{
+  "model_path": "E:\\YOLO\\python-train-cpp-infer-demo\\models\\cat_vs_dog\\best.onnx",
+  "io": {
+    "input_name": "images",
+    "input_shape": [
+      1,
+      3,
+      224,
+      224
+    ],
+    "input_type": "tensor(float)",
+    "output_name": "output0",
+    "output_shape": [
+      1,
+      2
+    ],
+    "output_type": "tensor(float)"
+  },
+  "metadata_raw": {
+    "date": "2026-04-06T10:48:26.236324",
+    "description": "Ultralytics YOLO11n-cls model trained on assets",
+    "author": "Ultralytics",
+    "version": "8.4.33",
+    "task": "classify",
+    "license": "AGPL-3.0 License (https://ultralytics.com/license)",
+    "docs": "https://docs.ultralytics.com",
+    "stride": "1",
+    "batch": "1",
+    "imgsz": "[224, 224]",
+    "names": "{0: 'cat', 1: 'dog'}",
+    "args": "{'batch': 1, 'half': False, 'dynamic': False, 'simplify': True, 'opset': None, 'nms': False}",
+    "channels": "3",
+    "end2end": "False"
+  },
+  "metadata_parsed": {
+    "date": "2026-04-06T10:48:26.236324",
+    "description": "Ultralytics YOLO11n-cls model trained on assets",
+    "author": "Ultralytics",
+    "version": "8.4.33",
+    "task": "classify",
+    "license": "AGPL-3.0 License (https://ultralytics.com/license)",
+    "docs": "https://docs.ultralytics.com",
+    "stride": 1,
+    "batch": 1,
+    "imgsz": [
+      224,
+      224
+    ],
+    "names": {
+      "0": "cat",
+      "1": "dog"
+    },
+    "args": {
+      "batch": 1,
+      "half": false,
+      "dynamic": false,
+      "simplify": true,
+      "opset": null,
+      "nms": false
+    },
+    "channels": 3,
+    "end2end": false
+  },
+  "cpp_recommended_config": {
+    "task": "classify",
+    "imgsz_hw": [
+      224,
+      224
+    ],
+    "layout": "NCHW",
+    "color_order": "RGB",
+    "pixel_range": "[0, 1]",
+    "normalize_mean": [
+      0.0,
+      0.0,
+      0.0
+    ],
+    "normalize_std": [
+      1.0,
+      1.0,
+      1.0
+    ],
+    "resize_rule": "short_edge_to_target_then_center_crop",
+    "resize_long_edge_rounding": "floor(int(target * long / short))",
+    "center_crop_rounding": "round((resized - target) / 2)",
+    "interpolation_hint": "PIL.BILINEAR in YOLO; C++ approximate: INTER_AREA when downsample else INTER_LINEAR",
+    "softmax_hint": "Do not add softmax again if model output already sums close to 1.",
+    "class_names": [
+      "cat",
+      "dog"
+    ]
+  },
+  "probe": {
+    "image_path": "E:\\YOLO\\python-train-cpp-infer-demo\\assets\\val\\dog\\dog_21.jpg",
+    "cpp_style": {
+      "shape": [
+        1,
+        2
+      ],
+      "sum": 0.9999999701976776,
+      "min": 0.29744085669517517,
+      "max": 0.7025591135025024,
+      "top1_index": 1,
+      "top1_score": 0.7025591135025024
+    },
+    "ultralytics_reference_style": {
+      "shape": [
+        1,
+        2
+      ],
+      "sum": 1.0000000298023224,
+      "min": 0.41840896010398865,
+      "max": 0.5815910696983337,
+      "top1_index": 1,
+      "top1_score": 0.5815910696983337
+    },
+    "top1_same": true,
+    "top1_score_abs_diff": 0.1209680438041687
+  }
+}
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+import argparse
+import ast
+import json
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+import onnxruntime as ort
+from PIL import Image
+
+
+def _safe_literal(value: Any) -> Any:
+    if not isinstance(value, str):
+        return value
+    try:
+        return ast.literal_eval(value)
+    except Exception:
+        return value
+
+
+def _normalize_names(names_value: Any) -> list[str]:
+    if isinstance(names_value, dict):
+        pairs: list[tuple[int, str]] = []
+        for key, value in names_value.items():
+            try:
+                idx = int(key)
+            except Exception:
+                continue
+            pairs.append((idx, str(value)))
+        if not pairs:
+            return []
+        pairs.sort(key=lambda item: item[0])
+        max_index = pairs[-1][0]
+        out = [f"class_{i}" for i in range(max_index + 1)]
+        for idx, name in pairs:
+            out[idx] = name
+        return out
+    if isinstance(names_value, (list, tuple)):
+        return [str(x) for x in names_value]
+    return []
+
+
+def _parse_imgsz(imgsz_value: Any, input_shape: list[Any]) -> list[int]:
+    if isinstance(imgsz_value, (list, tuple)) and len(imgsz_value) == 2:
+        try:
+            return [int(imgsz_value[0]), int(imgsz_value[1])]
+        except Exception:
+            pass
+
+    if len(input_shape) == 4:
+        h = input_shape[2]
+        w = input_shape[3]
+        if isinstance(h, int) and isinstance(w, int):
+            return [h, w]
+    return [224, 224]
+
+
+def _bgr_to_chw_float01(arr_bgr: np.ndarray) -> np.ndarray:
+    arr_rgb = cv2.cvtColor(arr_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+    return np.transpose(arr_rgb, (2, 0, 1))[None]
+
+
+def preprocess_cpp_style(arr_bgr: np.ndarray, target_h: int, target_w: int) -> np.ndarray:
+    h, w = arr_bgr.shape[:2]
+    scale = max(target_w / float(w), target_h / float(h))
+    resized_w = max(target_w, int(np.floor(w * scale)))
+    resized_h = max(target_h, int(np.floor(h * scale)))
+    interpolation = cv2.INTER_AREA if resized_w < w or resized_h < h else cv2.INTER_LINEAR
+    resized = cv2.resize(arr_bgr, (resized_w, resized_h), interpolation=interpolation)
+
+    crop_x = max(0, int(np.rint((resized_w - target_w) / 2.0)))
+    crop_y = max(0, int(np.rint((resized_h - target_h) / 2.0)))
+    cropped = resized[crop_y : crop_y + target_h, crop_x : crop_x + target_w]
+    return _bgr_to_chw_float01(cropped)
+
+
+def preprocess_ultralytics_reference(arr_bgr: np.ndarray, target_h: int, target_w: int) -> np.ndarray:
+    arr_rgb = cv2.cvtColor(arr_bgr, cv2.COLOR_BGR2RGB)
+    img = Image.fromarray(arr_rgb)
+    src_w, src_h = img.size
+
+    if src_w <= src_h:
+        resized_w = target_w
+        resized_h = max(1, int(target_w * src_h / src_w))
+    else:
+        resized_h = target_h
+        resized_w = max(1, int(target_h * src_w / src_h))
+
+    img = img.resize((resized_w, resized_h), resample=Image.BILINEAR)
+    crop_x = int(round((resized_w - target_w) / 2.0))
+    crop_y = int(round((resized_h - target_h) / 2.0))
+    img = img.crop((crop_x, crop_y, crop_x + target_w, crop_y + target_h))
+
+    arr = np.asarray(img, dtype=np.float32) / 255.0
+    return np.transpose(arr, (2, 0, 1))[None]
+
+
+def run_probe(session: ort.InferenceSession, input_name: str, tensor: np.ndarray) -> dict[str, Any]:
+    output = session.run(None, {input_name: tensor})[0]
+    logits = np.asarray(output).reshape(-1).astype(np.float64)
+    top1_idx = int(np.argmax(logits))
+    return {
+        "shape": list(np.asarray(output).shape),
+        "sum": float(logits.sum()),
+        "min": float(logits.min()),
+        "max": float(logits.max()),
+        "top1_index": top1_idx,
+        "top1_score": float(logits[top1_idx]),
+    }
+
+
+def inspect_model(model_path: Path, image_path: Path | None = None) -> dict[str, Any]:
+    session = ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"])
+    input0 = session.get_inputs()[0]
+    output0 = session.get_outputs()[0]
+    metadata_raw = dict(session.get_modelmeta().custom_metadata_map or {})
+    metadata = {k: _safe_literal(v) for k, v in metadata_raw.items()}
+    names = _normalize_names(metadata.get("names"))
+    imgsz = _parse_imgsz(metadata.get("imgsz"), list(input0.shape))
+
+    result: dict[str, Any] = {
+        "model_path": str(model_path.resolve()),
+        "io": {
+            "input_name": input0.name,
+            "input_shape": list(input0.shape),
+            "input_type": input0.type,
+            "output_name": output0.name,
+            "output_shape": list(output0.shape),
+            "output_type": output0.type,
+        },
+        "metadata_raw": metadata_raw,
+        "metadata_parsed": metadata,
+        "cpp_recommended_config": {
+            "task": metadata.get("task", "classify"),
+            "imgsz_hw": imgsz,
+            "layout": "NCHW",
+            "color_order": "RGB",
+            "pixel_range": "[0, 1]",
+            "normalize_mean": [0.0, 0.0, 0.0],
+            "normalize_std": [1.0, 1.0, 1.0],
+            "resize_rule": "short_edge_to_target_then_center_crop",
+            "resize_long_edge_rounding": "floor(int(target * long / short))",
+            "center_crop_rounding": "round((resized - target) / 2)",
+            "interpolation_hint": "PIL.BILINEAR in YOLO; C++ approximate: INTER_AREA when downsample else INTER_LINEAR",
+            "softmax_hint": "Do not add softmax again if model output already sums close to 1.",
+            "class_names": names,
+        },
+    }
+
+    if image_path is not None:
+        buf = np.fromfile(str(image_path), dtype=np.uint8)
+        arr_bgr = cv2.imdecode(buf, cv2.IMREAD_COLOR)
+        if arr_bgr is None:
+            raise RuntimeError(f"Failed to read image: {image_path}")
+
+        target_h, target_w = imgsz
+        cpp_tensor = preprocess_cpp_style(arr_bgr, target_h, target_w)
+        yolo_tensor = preprocess_ultralytics_reference(arr_bgr, target_h, target_w)
+        probe_cpp = run_probe(session, input0.name, cpp_tensor)
+        probe_yolo = run_probe(session, input0.name, yolo_tensor)
+
+        result["probe"] = {
+            "image_path": str(image_path.resolve()),
+            "cpp_style": probe_cpp,
+            "ultralytics_reference_style": probe_yolo,
+            "top1_same": probe_cpp["top1_index"] == probe_yolo["top1_index"],
+            "top1_score_abs_diff": abs(probe_cpp["top1_score"] - probe_yolo["top1_score"]),
+        }
+
+    return result
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Inspect Ultralytics ONNX metadata and export C++ inference parameters."
+    )
+    parser.add_argument(
+        "--model",
+        type=Path,
+        default=Path("models/cat_vs_dog/best.onnx"),
+        help="Path to ONNX model.",
+    )
+    parser.add_argument(
+        "--image",
+        type=Path,
+        default=None,
+        help="Optional image path for probe inference comparison.",
+    )
+    parser.add_argument(
+        "--out",
+        type=Path,
+        default=None,
+        help="Output JSON path (default: <model>.infer_params.json).",
+    )
+    parser.add_argument(
+        "--print-only",
+        action="store_true",
+        help="Only print JSON, do not write file.",
+    )
+    args = parser.parse_args()
+
+    payload = inspect_model(args.model, args.image)
+    output_text = json.dumps(payload, ensure_ascii=False, indent=2)
+    print(output_text)
+
+    if args.print_only:
+        return
+
+    out_path = args.out or args.model.with_suffix(".infer_params.json")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(output_text + "\n", encoding="utf-8")
+    print(f"\nSaved: {out_path.resolve()}")
+
+
+if __name__ == "__main__":
+    main()