|
26 | 26 |
|
27 | 27 | ARG PYTHON_VERSION=3.12 |
28 | 28 |
|
29 | | -# Pinned to the rknn-llm git tag whose librkllmrt.so reports the same |
30 | | -# runtime version (1.1.4) the lab device's NPU driver expects. SHAs are |
31 | | -# what raw.githubusercontent.com returns today (2026-05-26) — pinning |
32 | | -# them means a future upstream rewrite of the same tag (rare but possible) |
33 | | -# fails the build instead of silently shipping a different binary. |
| 29 | +# Pinned to release-v1.2.3 (2025-11-24) — first runtime with full |
| 30 | +# Qwen 3 support. The 1.1.4 runtime that shipped previously can't load |
| 31 | +# Qwen3ForCausalLM at all (toolkit returns "Not support Qwen3ForCausalLM!"); |
| 32 | +# 1.2.3 adds Qwen3, function-calling, thinking-mode chat template |
| 33 | +# parsing, multi-batch inference + the GRQ Int4 quantization optimizer. |
| 34 | +# Toolkit and runtime MUST match — if a future model is converted with |
| 35 | +# a newer toolkit, bump this version + rebuild the image. |
| 36 | +# |
| 37 | +# Migration history: |
| 38 | +# - release-v1.1.4: original (Qwen 2.5 3B then 1.5B W8A8) |
| 39 | +# - release-v1.2.3: current (Qwen 3 1.7B W8A8 with thinking mode) |
| 40 | +# |
| 41 | +# librknnrt directory rename in 1.2.x: examples/rkllm_multimodel_demo/ |
| 42 | +# became examples/multimodal_model_demo/ (note "rkllm_" prefix dropped, |
| 43 | +# "multimodel" -> "multimodal"). Both files are AARCH64-only; on x86 |
| 44 | +# builds the rkllm-libs stage is a no-op + MockBackend takes over. |
34 | 45 | # |
35 | 46 | # Verified end-to-end on lab pi@192.168.2.159 (RK3588, Armbian, kernel |
36 | | -# 6.1.115-vendor-rk35xx): rkllm_init succeeds, real Qwen 2.5 3B streaming |
37 | | -# through /troubleshoot works. |
38 | | -ARG RKLLM_VERSION=release-v1.1.4 |
39 | | -ARG RKLLMRT_URL=https://raw.githubusercontent.com/airockchip/rknn-llm/release-v1.1.4/rkllm-runtime/Linux/librkllm_api/aarch64/librkllmrt.so |
40 | | -ARG RKLLMRT_SHA256=3cef353105c3bfd31f99c4963fce8498d2fac633d845633c904f523b7c3bcd0a |
41 | | -ARG RKNNRT_URL=https://raw.githubusercontent.com/airockchip/rknn-llm/release-v1.1.4/examples/rkllm_multimodel_demo/deploy/3rdparty/librknnrt/Linux/librknn_api/aarch64/librknnrt.so |
42 | | -ARG RKNNRT_SHA256=1170e5f99f2db7ed4d3a4c2bdbed941b7363bd090e0c28b4e210f40614327911 |
| 47 | +# 6.1.115-vendor-rk35xx): rkllm_init succeeds with 1.2.3 ABI; Qwen3 1.7B |
| 48 | +# W8A8 streaming through /troubleshoot works. |
| 49 | +ARG RKLLM_VERSION=release-v1.2.3 |
| 50 | +ARG RKLLMRT_URL=https://raw.githubusercontent.com/airockchip/rknn-llm/release-v1.2.3/rkllm-runtime/Linux/librkllm_api/aarch64/librkllmrt.so |
| 51 | +ARG RKLLMRT_SHA256=bbcf28a8666b9fbf7361d6aad892b957920f6ea92400c074899b48f4c5b2c96f |
| 52 | +ARG RKNNRT_URL=https://raw.githubusercontent.com/airockchip/rknn-llm/release-v1.2.3/examples/multimodal_model_demo/deploy/3rdparty/librknnrt/Linux/librknn_api/aarch64/librknnrt.so |
| 53 | +ARG RKNNRT_SHA256=d31fc19c85b85f6091b2bd0f6af9d962d5264a4e410bfb536402ec92bac738e8 |
43 | 54 |
|
44 | 55 | # --------------------------------------------------------------------------- |
45 | 56 | # Stage 0: fetch Rockchip RKLLM runtime binaries (arm64 only). |
|
0 commit comments