RunanywhereAI
diff --git a/‎.github/workflows/pr-build.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/pr-build.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.syncpackrc.json‎
Lines changed: 3 additions & 8 deletions b/‎.syncpackrc.json‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 10 additions & 16 deletions b/‎CMakeLists.txt‎
Lines changed: 10 additions & 16 deletions
diff --git a/‎engines/llamacpp/CMakeLists.txt‎
Lines changed: 52 additions & 101 deletions b/‎engines/llamacpp/CMakeLists.txt‎
Lines changed: 52 additions & 101 deletions
diff --git a/‎engines/llamacpp/jni/rac_backend_llamacpp_jni.cpp‎
Lines changed: 7 additions & 50 deletions b/‎engines/llamacpp/jni/rac_backend_llamacpp_jni.cpp‎
Lines changed: 7 additions & 50 deletions
@@ -38,7 +38,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
-        with: { node-version: '20' }
+        with: { node-version: '22' }
       - name: TypeScript / Web / RN centralization (syncpack)
         run: bash scripts/validation/check_typescript_centralization.sh
       - name: Flutter dependency centralization
@@ -256,7 +256,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
-        with: { node-version: '20' }
+        with: { node-version: '22' }
       # Repo root and sdk/runanywhere-react-native pin `packageManager: yarn@3.6.1`
       # (Yarn Berry). The runner's global yarn is 1.x and refuses to run, so we
       # activate the pinned version through Corepack before any `yarn` call.
@@ -286,7 +286,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v4
-        with: { node-version: '20' }
+        with: { node-version: '22' }
       # Install + typecheck from the workspace root so the in-tree
       # @runanywhere/proto-ts package (sdk/shared/proto-ts) is resolved as a
       # workspace member instead of being fetched from the npm registry. The
 
@@ -14,14 +14,15 @@
   "dependencyTypes": ["dev", "prod", "peer"],
   "versionGroups": [
     {
-      "label": "Local workspace packages: ignore (managed by workspaces protocol / peer ranges). NOTE: @runanywhere/proto-ts is a published npm package centralized in dependencies/versions.json, so it is NOT ignored here — see its own pin below.",
+      "label": "Local workspace packages: ignore (managed by workspaces protocol / peer ranges). @runanywhere/proto-ts lives in sdk/shared/proto-ts and is consumed via workspace:* by all TS SDK packages — it is not published to npm.",
       "dependencies": [
         "@runanywhere/core",
         "@runanywhere/llamacpp",
         "@runanywhere/onnx",
         "@runanywhere/web",
         "@runanywhere/web-llamacpp",
-        "@runanywhere/web-onnx"
+        "@runanywhere/web-onnx",
+        "@runanywhere/proto-ts"
       ],
       "isIgnored": true
     },
@@ -91,11 +92,6 @@
       "dependencies": ["vite"],
       "pinVersion": "^6.4.2"
     },
-    {
-      "label": "Pinned: @runanywhere/proto-ts (centralized in dependencies/versions.json) — published npm package, schema-sensitive lockstep with all TS SDK packages",
-      "dependencies": ["@runanywhere/proto-ts"],
-      "pinVersion": "^0.21.0"
-    },
     {
       "label": "Pinned: react-native (centralized in dependencies/versions.json) — Hermes/JSI/NitroModules ABI tracks this version",
       "dependencies": ["react-native"],
@@ -139,7 +135,6 @@
         "prettier",
         "@playwright/test",
         "vite",
-        "@runanywhere/proto-ts",
         "@eslint/js",
         "tsd"
       ],
 
@@ -88,19 +88,22 @@ set_property(CACHE RAC_SANITIZER PROPERTY STRINGS "" asan tsan ubsan)
 # final values before their CMakeLists run. The wasm wrapper target itself
 # lives under sdk/runanywhere-web/wasm and is added after the engine targets.
 if(EMSCRIPTEN)
-    option(RAC_WASM_PTHREADS   "Enable pthreads in the WebAssembly target" OFF)
-    option(RAC_WASM_DEBUG      "Enable debug assertions/logging in the WebAssembly target" OFF)
-    option(RAC_WASM_LLAMACPP   "Build the WebAssembly target with llama.cpp" OFF)
-    option(RAC_WASM_VLM        "Enable llama.cpp mtmd VLM support in the WebAssembly target" OFF)
-    option(RAC_WASM_WHISPERCPP "Build the WebAssembly target with whisper.cpp" OFF)
-    option(RAC_WASM_ONNX       "Build the WebAssembly target with ONNX/sherpa support" OFF)
-    option(RAC_WASM_WEBGPU     "Enable WebGPU acceleration for the WebAssembly target" OFF)
+    option(RAC_WASM_PTHREADS    "Enable pthreads in the WebAssembly target" OFF)
+    option(RAC_WASM_DEBUG       "Enable debug assertions/logging in the WebAssembly target" OFF)
+    option(RAC_WASM_BUILD_CORE  "Build the commons-only Web SDK WASM target" OFF)
+    option(RAC_WASM_LLAMACPP    "Build the WebAssembly target with llama.cpp (LLM + VLM)" OFF)
+    option(RAC_WASM_WHISPERCPP  "Build the WebAssembly target with whisper.cpp" OFF)
+    option(RAC_WASM_ONNX        "Build the WebAssembly target with ONNX/sherpa support" OFF)
+    option(RAC_WASM_WEBGPU      "Enable WebGPU acceleration for the WebAssembly target" OFF)
 
     set(RAC_BUILD_JNI OFF CACHE BOOL "" FORCE)
     set(RAC_BUILD_TESTS OFF CACHE BOOL "" FORCE)
     set(RAC_BUILD_SHARED OFF CACHE BOOL "" FORCE)
     set(RAC_BUILD_PLATFORM OFF CACHE BOOL "" FORCE)
 
+    # Backends follow the union of requested WASM targets. The core-only target
+    # never links backend archives, but a configure that ONLY requests core
+    # still leaves backends off (commons compiles standalone).
     if(RAC_WASM_LLAMACPP OR RAC_WASM_WHISPERCPP OR RAC_WASM_ONNX)
         set(RAC_BUILD_BACKENDS ON CACHE BOOL "" FORCE)
     else()
@@ -111,15 +114,6 @@ if(EMSCRIPTEN)
     set(RAC_BACKEND_WHISPERCPP ${RAC_WASM_WHISPERCPP} CACHE BOOL "" FORCE)
     set(RAC_BACKEND_ONNX ${RAC_WASM_ONNX} CACHE BOOL "" FORCE)
 
-    if(RAC_WASM_VLM AND RAC_WASM_LLAMACPP)
-        set(RAC_VLM_USE_MTMD ON CACHE BOOL "" FORCE)
-    else()
-        if(RAC_WASM_VLM AND NOT RAC_WASM_LLAMACPP)
-            message(WARNING "RAC_WASM_VLM requires RAC_WASM_LLAMACPP=ON; disabling VLM.")
-        endif()
-        set(RAC_VLM_USE_MTMD OFF CACHE BOOL "" FORCE)
-    endif()
-
     if(RAC_WASM_LLAMACPP OR RAC_WASM_WHISPERCPP)
         set(GGML_METAL OFF CACHE BOOL "" FORCE)
         set(GGML_VULKAN OFF CACHE BOOL "" FORCE)
 
@@ -146,68 +146,55 @@ set(LLAMACPP_BACKEND_SOURCES
     llamacpp_backend.cpp
     rac_llm_llamacpp.cpp
     rac_backend_llamacpp_register.cpp
-    # GAP 02 Phase 8: unified engine plugin entry point. Coexists with the
-    # legacy rac_backend_llamacpp_register() bootstrap; both wrap the same
-    # ops-struct.
+    # Unified ABI plugin entry — fills both llm_ops and vlm_ops slots.
     rac_plugin_entry_llamacpp.cpp
+    # VLM Backend (vision-language as a modality of the same engine).
+    rac_vlm_llamacpp.cpp
+    # VLM ops vtable (the g_llamacpp_vlm_ops struct consumed by the unified
+    # plugin entry). Extracted from the (now-deleted) VLM register file when
+    # the two plugin entries were unified.
+    rac_llamacpp_vlm_ops.cpp
 )
 
 set(LLAMACPP_BACKEND_HEADERS
     llamacpp_backend.h
 )
 
-# Option to enable VLM multimodal support (requires mtmd from llama.cpp)
-option(RAC_VLM_USE_MTMD "Enable VLM multimodal support via llama.cpp mtmd" ON)
-if(RAC_VLM_USE_MTMD)
-    message(STATUS "VLM multimodal support enabled")
-    # Add VLM Backend sources (Vision Language Model)
-    list(APPEND LLAMACPP_BACKEND_SOURCES
-        rac_vlm_llamacpp.cpp
-        rac_backend_llamacpp_vlm_register.cpp
-        # GAP 02 Phase 8: unified engine plugin entry point for VLM.
-        rac_plugin_entry_llamacpp_vlm.cpp
-    )
-    # Add mtmd sources from llama.cpp tools directory. mtmd-image.cpp
-    # carries the `mtmd_image_preprocessor_*` vtables that mtmd.cpp
-    # references during `mtmd_context::init_vision()`; without it ld.lld
-    # on Linux/Android complains:
-    #   ld.lld: error: undefined symbol: vtable for
-    #     mtmd_image_preprocessor_{llava_uhd,step3vl,idefics3,internvl}
-    list(APPEND LLAMACPP_BACKEND_SOURCES
-        ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd.cpp
-        ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd-helper.cpp
-        ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd-audio.cpp
-        ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd-image.cpp
-        ${llamacpp_SOURCE_DIR}/tools/mtmd/clip.cpp
-    )
-    # Glob every mtmd model implementation upstream ships. The previous
-    # hand-curated list drifts each llama.cpp bump (b9180 adds
-    # deepseekocr / dotsocr / gemma4a / gemma4v / granite-speech /
-    # hunyuanocr / mimovl / qwen3a / step3vl / yasa2 over what we
-    # listed). On Linux/Android the resulting clip_image_build_graph
-    # references unconditionally need every clip_graph_<name>::* vtable
-    # to be present at link time, otherwise ld.lld bails with
-    #   ld.lld: error: undefined symbol: vtable for clip_graph_<name>
-    # `CONFIGURE_DEPENDS` re-globs when the directory changes.
-    #
-    # pass2-syn-063: CONFIGURE_DEPENDS is best-effort on Visual Studio /
-    # Xcode generators (CMake docs: "a generator may not implement this
-    # feature"). All RAC CMakePresets that build this engine use Ninja
-    # (macos-debug, macos-release, linux-asan, android-arm64, ios-*),
-    # so re-glob fires reliably for our CI matrix. Developers who run an
-    # Xcode-driven CMake build of the C++ engine for direct on-device
-    # profiling MUST `cmake --reconfigure` after an upstream llama.cpp
-    # bump that adds a new tools/mtmd/models/*.cpp file; otherwise the
-    # build fails at link time with an undefined vtable for
-    # clip_graph_<new_model>. The STATUS message below prints the
-    # discovered count to make a silent drop loud during local builds.
-    file(GLOB _RAC_MTMD_MODEL_SOURCES CONFIGURE_DEPENDS
-        "${llamacpp_SOURCE_DIR}/tools/mtmd/models/*.cpp"
-    )
-    list(LENGTH _RAC_MTMD_MODEL_SOURCES _RAC_MTMD_MODEL_COUNT)
-    message(STATUS "VLM mtmd model implementations discovered: ${_RAC_MTMD_MODEL_COUNT} (llama.cpp ${LLAMACPP_VERSION})")
-    list(APPEND LLAMACPP_BACKEND_SOURCES ${_RAC_MTMD_MODEL_SOURCES})
-endif()
+# VLM multimodal support is always compiled in — llama.cpp is one engine
+# that supports both LLM and VLM modalities. The two are exposed through
+# a single unified plugin vtable, not two separate plugins.
+#
+# Add mtmd sources from llama.cpp tools directory. mtmd-image.cpp carries
+# the `mtmd_image_preprocessor_*` vtables that mtmd.cpp references during
+# `mtmd_context::init_vision()`; without it ld.lld on Linux/Android complains:
+#   ld.lld: error: undefined symbol: vtable for
+#     mtmd_image_preprocessor_{llava_uhd,step3vl,idefics3,internvl}
+list(APPEND LLAMACPP_BACKEND_SOURCES
+    ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd.cpp
+    ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd-helper.cpp
+    ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd-audio.cpp
+    ${llamacpp_SOURCE_DIR}/tools/mtmd/mtmd-image.cpp
+    ${llamacpp_SOURCE_DIR}/tools/mtmd/clip.cpp
+)
+
+# Glob every mtmd model implementation upstream ships. The hand-curated list
+# drifts each llama.cpp bump (b9180 adds deepseekocr / dotsocr / gemma4a /
+# gemma4v / granite-speech / hunyuanocr / mimovl / qwen3a / step3vl / yasa2
+# etc.). On Linux/Android the resulting clip_image_build_graph references
+# unconditionally need every clip_graph_<name>::* vtable to be present at
+# link time, otherwise ld.lld bails with
+#   ld.lld: error: undefined symbol: vtable for clip_graph_<name>
+# `CONFIGURE_DEPENDS` re-globs when the directory changes; it is best-effort
+# on Visual Studio / Xcode generators but reliable on Ninja (which RAC's
+# CMakePresets use). Developers running an Xcode-driven CMake build for
+# on-device profiling MUST `cmake --reconfigure` after an upstream llama.cpp
+# bump that adds a new tools/mtmd/models/*.cpp file.
+file(GLOB _RAC_MTMD_MODEL_SOURCES CONFIGURE_DEPENDS
+    "${llamacpp_SOURCE_DIR}/tools/mtmd/models/*.cpp"
+)
+list(LENGTH _RAC_MTMD_MODEL_SOURCES _RAC_MTMD_MODEL_COUNT)
+message(STATUS "VLM mtmd model implementations discovered: ${_RAC_MTMD_MODEL_COUNT} (llama.cpp ${LLAMACPP_VERSION})")
+list(APPEND LLAMACPP_BACKEND_SOURCES ${_RAC_MTMD_MODEL_SOURCES})
 
 # GAP 06 close-out (Phase 1 / B1): migrated to rac_add_engine_plugin().
 #
@@ -257,41 +244,32 @@ rac_add_engine_plugin(llamacpp
                         ${llamacpp_SOURCE_DIR}/ggml/include
                         ${llamacpp_SOURCE_DIR}/vendor # nlohmann/json.hpp
     COMPILE_DEFINITIONS RAC_LLAMACPP_BUILDING
-    PRIMITIVES          GENERATE_TEXT
+    PRIMITIVES          GENERATE_TEXT VLM
     RUNTIMES            CPU METAL CUDA
     FORMATS             GGUF GGML BIN
     AVAILABILITY        PUBLIC
     PACKAGE_OWNER       runanywhere
     PACKAGE_NAME        runanywhere_llamacpp
 )
 
-# VLM multimodal mtmd includes/defs — engine-specific, applied after the macro.
-if(RAC_VLM_USE_MTMD)
-    target_include_directories(rac_backend_llamacpp PRIVATE
-        ${llamacpp_SOURCE_DIR}/tools/mtmd
-        ${llamacpp_SOURCE_DIR}/tools/mtmd/models
-    )
-    target_compile_definitions(rac_backend_llamacpp PRIVATE RAC_VLM_USE_MTMD=1)
-endif()
+# VLM multimodal mtmd includes — engine-specific, applied after the macro.
+# mtmd is always compiled in (one engine, two modalities).
+target_include_directories(rac_backend_llamacpp PRIVATE
+    ${llamacpp_SOURCE_DIR}/tools/mtmd
+    ${llamacpp_SOURCE_DIR}/tools/mtmd/models
+)
 
 # Self-registration carrier: thin shim that wraps RAC_STATIC_PLUGIN_REGISTER.
+# Since llama.cpp is one engine with both LLM and VLM modalities exposed by
+# the same plugin vtable, there is a single static-register shim and a single
+# SHARED carrier (no separate _vlm variants).
 if(RAC_STATIC_PLUGINS)
     # Static path: ctor lands inside rac_commons so it runs before main().
     # rac_commons PUBLIC-links rac_backend_llamacpp below so the entry
     # symbol resolves at link time.
     target_sources(rac_commons PRIVATE
         ${CMAKE_CURRENT_SOURCE_DIR}/rac_static_register_llamacpp.cpp
     )
-    if(RAC_VLM_USE_MTMD)
-        # Swift-only E2E Phase 6e: VLM plugin needs its own static-register
-        # ctor to be routable via rac_plugin_route(framework=llamacpp,
-        # primitive=vlm). Without this, only the LLM plugin registers at
-        # process start and VLM loads fail with
-        # "no backend route for llamacpp_vlm".
-        target_sources(rac_commons PRIVATE
-            ${CMAKE_CURRENT_SOURCE_DIR}/rac_static_register_llamacpp_vlm.cpp
-        )
-    endif()
     target_include_directories(rac_commons PRIVATE
         ${CMAKE_CURRENT_SOURCE_DIR}
         ${RAC_COMMONS_ROOT_DIR}/include
@@ -313,33 +291,6 @@ else()
     )
     rac_apply_android_page_alignment(runanywhere_llamacpp)
     install(TARGETS runanywhere_llamacpp LIBRARY DESTINATION lib)
-
-    if(RAC_VLM_USE_MTMD)
-        # Parallel SHARED carrier for the VLM vtable so dynamic-plugin hosts
-        # can route RAC_PRIMITIVE_VLM (name="llamacpp_vlm") via
-        # `rac_registry_load_plugin(librunanywhere_llamacpp_vlm.*)`. The
-        # dlopen loader derives the entry-symbol name from the library
-        # filename (see plugin_loader.cpp::entry_symbol_from_path), so the
-        # carrier's basename must match `runanywhere_llamacpp_vlm` to map to
-        # `rac_plugin_entry_llamacpp_vlm`. Without this, a host that does NOT
-        # call `rac_backend_llamacpp_vlm_register()` directly (the Swift /
-        # Kotlin / RN bridge path) cannot reach the VLM plugin even though
-        # the implementation is linked into rac_backend_llamacpp.
-        add_library(runanywhere_llamacpp_vlm SHARED
-            ${CMAKE_CURRENT_SOURCE_DIR}/rac_static_register_llamacpp_vlm.cpp
-        )
-        set_target_properties(runanywhere_llamacpp_vlm PROPERTIES
-            OUTPUT_NAME           runanywhere_llamacpp_vlm
-            C_VISIBILITY_PRESET   hidden
-            CXX_VISIBILITY_PRESET hidden
-        )
-        target_link_libraries(runanywhere_llamacpp_vlm PUBLIC
-            rac_backend_llamacpp
-            rac_commons
-        )
-        rac_apply_android_page_alignment(runanywhere_llamacpp_vlm)
-        install(TARGETS runanywhere_llamacpp_vlm LIBRARY DESTINATION lib)
-    endif()
 endif()
 
 # =============================================================================
 
@@ -30,11 +30,11 @@ static const char *LOG_TAG = "JNI.LlamaCpp";
 #define LOGe(...) RAC_LOG_ERROR(LOG_TAG, __VA_ARGS__)
 #define LOGw(...) RAC_LOG_WARNING(LOG_TAG, __VA_ARGS__)
 
-// Forward declaration for registration functions
+// Forward declaration for the unified registration function. After the
+// LLM/VLM plugin unification, llama.cpp publishes ONE plugin that fills
+// both `llm_ops` and `vlm_ops` slots; there is no separate VLM register.
 extern "C" rac_result_t rac_backend_llamacpp_register(void);
 extern "C" rac_result_t rac_backend_llamacpp_unregister(void);
-extern "C" rac_result_t rac_backend_llamacpp_vlm_register(void);
-extern "C" rac_result_t rac_backend_llamacpp_vlm_unregister(void);
 
 extern "C" {
 
@@ -118,56 +118,13 @@ Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeGetVersion(
   return env->NewStringUTF(RAC_LLAMACPP_VERSION);
 }
 
-// =============================================================================
-// VLM Backend Registration
-// =============================================================================
-
-/**
- * Register the LlamaCPP VLM backend with the C++ service registry.
- * Mirrors iOS LlamaCPP.registerVLM() pattern.
- */
-JNIEXPORT jint JNICALL
-Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeRegisterVlm(
-    JNIEnv *env, jclass clazz) {
-  (void)env;
-  (void)clazz;
-  LOGi("LlamaCPP nativeRegisterVlm called");
-
-  rac_result_t result = rac_backend_llamacpp_vlm_register();
-
-  if (result != RAC_SUCCESS && result != RAC_ERROR_MODULE_ALREADY_REGISTERED) {
-    LOGe("Failed to register LlamaCPP VLM backend: %d", result);
-    return static_cast<jint>(result);
-  }
-
-  LOGi("LlamaCPP VLM backend registered successfully");
-  return RAC_SUCCESS;
-}
-
-/**
- * Unregister the LlamaCPP VLM backend from the C++ service registry.
- */
-JNIEXPORT jint JNICALL
-Java_com_runanywhere_sdk_llm_llamacpp_LlamaCPPBridge_nativeUnregisterVlm(
-    JNIEnv *env, jclass clazz) {
-  (void)env;
-  (void)clazz;
-  LOGi("LlamaCPP nativeUnregisterVlm called");
-
-  rac_result_t result = rac_backend_llamacpp_vlm_unregister();
-
-  if (result != RAC_SUCCESS) {
-    LOGe("Failed to unregister LlamaCPP VLM backend: %d", result);
-  } else {
-    LOGi("LlamaCPP VLM backend unregistered");
-  }
-
-  return static_cast<jint>(result);
-}
-
 // =============================================================================
 // LLM Operations - Direct API calls
 // =============================================================================
+// Note: VLM registration is no longer a separate JNI call. After the LLM/VLM
+// plugin unification, `rac_backend_llamacpp_register()` registers ONE plugin
+// vtable with both LLM and VLM ops filled. Kotlin's `LlamaCPP.register()`
+// calls `nativeRegister()` once; both modalities light up together.
 
 /**
  * Create a LlamaCPP instance and load a model