diff --git a/.github/build.bat b/.github/build.bat index d48f0957..a9df77f5 100755 --- a/.github/build.bat +++ b/.github/build.bat @@ -6,6 +6,11 @@ REM SPDX-License-Identifier: MIT @echo off setlocal enabledelayedexpansion +REM The core project (CMakeLists.txt + src\) lives in the `llama\` module of the Maven +REM reactor. Re-root here once (relative to this script's own location) so cmake +REM configures the module regardless of the caller's CWD. +cd /d "%~dp0..\llama" || exit /b 1 + REM --------------------------------------------------------------------------- REM Optional shared compiler cache: sccache fronting Depot Cache (WebDAV). REM Mirrors build.sh's sccache_can_wrap_compiler() probe. Because sccache *is* diff --git a/.github/build.sh b/.github/build.sh index 480e2009..7a47ab65 100755 --- a/.github/build.sh +++ b/.github/build.sh @@ -5,6 +5,14 @@ # # SPDX-License-Identifier: MIT +# The core project (CMakeLists.txt + src/) lives in the `llama/` module of the Maven +# reactor. Re-root here once — every native build delegates to this script (incl. the +# build_cuda_linux.sh / build_opencl_*.sh wrappers that `exec .github/build.sh`) — so +# cmake configures the module regardless of the caller's CWD. Anchored to this script's +# own location (via BASH_SOURCE), so it works from the repo root and inside the dockcross +# container whose workdir is the mounted repo root. +cd "$(dirname "${BASH_SOURCE[0]}")/../llama" || exit 1 + mkdir -p build # Build parallelism. Defaults to all cores; RAM-limited CI runners (notably GitHub's diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index fee925c6..623b6db5 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -31,8 +31,9 @@ jobs: - name: Check C++ formatting run: | clang-format --version - # All hand-written C++ sources; the generated JNI header (src/main/cpp/jllama.h, - # produced by `javac -h`) is intentionally excluded. - files=$(find src/main/cpp src/test/cpp -type f \( -name '*.cpp' -o -name '*.hpp' \) | sort) + # All hand-written C++ sources; the generated JNI header (llama/src/main/cpp/jllama.h, + # produced by `javac -h`) is intentionally excluded. Sources live in the `llama/` + # reactor module; clang-format finds llama/.clang-format by walking up from each file. + files=$(find llama/src/main/cpp llama/src/test/cpp -type f \( -name '*.cpp' -o -name '*.hpp' \) | sort) echo "Checking:"; echo "$files" clang-format --dry-run --Werror $files diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b4ec2ad5..0d4eb6ff 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -50,7 +50,7 @@ env: # at src/test/resources/images/test-image.jpg (see the README in that # directory for licensing). No download step is needed; CI just points # mvn test at the committed path. - VISION_IMAGE_PATH: "src/test/resources/images/test-image.jpg" + VISION_IMAGE_PATH: "llama/src/test/resources/images/test-image.jpg" permissions: contents: read jobs: @@ -133,25 +133,26 @@ jobs: java-version: '21' distribution: temurin - name: Spotless check (fail fast on format violations) - run: mvn -B --no-transfer-progress spotless:check + run: mvn -B --no-transfer-progress -f llama/pom.xml spotless:check - name: SpotBugs check (fail fast on static-analysis findings) - run: mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile spotbugs:check + run: mvn -B --no-transfer-progress -f llama/pom.xml -DskipTests -Denforcer.skip=true compile spotbugs:check - name: Print internal package dependency graph (jdeps, informational) continue-on-error: true run: | - mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile + mvn -B --no-transfer-progress -f llama/pom.xml -DskipTests -Denforcer.skip=true compile echo "=== internal package dependency graph (jdeps, bytecode) ===" - jdeps -verbose:package target/classes | grep 'net.ladenthin.llama' || true + jdeps -verbose:package llama/target/classes | grep 'net.ladenthin.llama' || true # --------------------------------------------------------------------------- # Sibling module `llama-langchain4j` (LangChain4j adapters). Pure Java, no native # code and no per-classifier matrix: it compiles against the core's stable Java API # (identical across every classifier) and the backend is a runtime choice for the - # consumer. This job installs the core Java jar, guards that the module version is in - # lockstep with the core, then builds + tests the module (Java 17; langchain4j 1.x - # baseline). It runs its mapping unit tests; the model-backed integration test - # self-skips without a GGUF. `verify` also builds the javadoc/sources jars so a - # release-time javadoc break is caught here in PR CI. + # consumer. This job installs the parent + core into the local repo, then builds + tests + # the module (Java 17; langchain4j 1.x baseline). It runs its mapping unit tests; the + # model-backed integration test self-skips without a GGUF. `verify` also builds the + # javadoc/sources jars so a release-time javadoc break is caught here in PR CI. Version + # lockstep is now guaranteed by construction (both modules inherit the parent's version), + # so the old lockstep guard is gone. # --------------------------------------------------------------------------- test-java-llama-langchain4j: @@ -164,19 +165,9 @@ jobs: with: java-version: ${{ env.JAVA_VERSION }} distribution: temurin - - name: Version lockstep guard (module version must equal core version) - shell: bash - run: | - CORE=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version | tail -n1) - MOD=$(mvn -q -DforceStdout -f llama-langchain4j/pom.xml help:evaluate -Dexpression=project.version | tail -n1) - echo "core=$CORE module=$MOD" - if [ "$CORE" != "$MOD" ]; then - echo "::error::Version drift: core ($CORE) != llama-langchain4j ($MOD). Keep llama-langchain4j/pom.xml in lockstep with the root pom.xml." - exit 1 - fi - - name: Install core net.ladenthin:llama jar (Java only) + - name: Install parent + core net.ladenthin:llama into the local repo (Java only) run: > - mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true + mvn -B --no-transfer-progress -pl llama -am -DskipTests -Denforcer.skip=true -Dspotless.check.skip=true -Dspotbugs.skip=true -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Dgpg.skip=true install - name: Build and test llama-langchain4j @@ -202,7 +193,7 @@ jobs: uses: actions/download-artifact@v8 with: name: Linux-x86_64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ - name: Restore shared GGUF model cache (populated by download-models; no re-download) uses: actions/cache@v6 with: @@ -212,9 +203,9 @@ jobs: with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} - - name: Install core net.ladenthin:llama jar (bundles the downloaded native library) + - name: Install parent + core net.ladenthin:llama (bundles the downloaded native library) run: > - mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true + mvn -B --no-transfer-progress -pl llama -am -DskipTests -Denforcer.skip=true -Dspotless.check.skip=true -Dspotbugs.skip=true -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Dgpg.skip=true install - name: Run llama-langchain4j model-backed integration tests (reused cached models) @@ -242,7 +233,7 @@ jobs: id: tag shell: bash run: | - TAG=$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' CMakeLists.txt | grep -oE 'b[0-9]+' | head -1) + TAG=$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' llama/CMakeLists.txt | grep -oE 'b[0-9]+' | head -1) if [ -z "$TAG" ]; then echo "could not resolve llama.cpp GIT_TAG (b) from CMakeLists.txt" >&2 exit 1 @@ -284,14 +275,14 @@ jobs: done ) # llama-ui-embed is a self-contained C++17 host tool (no npm) — build + run it. g++ -O2 -std=c++17 -o llama-ui-embed embed.cpp - mkdir -p "$GITHUB_WORKSPACE/webui-generated" + mkdir -p "$GITHUB_WORKSPACE/llama/webui-generated" ./llama-ui-embed \ - "$GITHUB_WORKSPACE/webui-generated/ui.cpp" \ - "$GITHUB_WORKSPACE/webui-generated/ui.h" \ + "$GITHUB_WORKSPACE/llama/webui-generated/ui.cpp" \ + "$GITHUB_WORKSPACE/llama/webui-generated/ui.h" \ dist echo "=== generated WebUI assets ===" - ls -la "$GITHUB_WORKSPACE/webui-generated" - if grep -q LLAMA_UI_HAS_ASSETS "$GITHUB_WORKSPACE/webui-generated/ui.h"; then + ls -la "$GITHUB_WORKSPACE/llama/webui-generated" + if grep -q LLAMA_UI_HAS_ASSETS "$GITHUB_WORKSPACE/llama/webui-generated/ui.h"; then echo "LLAMA_UI_HAS_ASSETS: present (real WebUI embedded)" else echo "ERROR: embed produced an empty asset table" >&2 @@ -301,7 +292,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ retention-days: 1 if-no-files-found: error @@ -332,7 +323,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Display CPU Info shell: bash run: | @@ -349,7 +340,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: linux-libraries-cuda - path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_linux_cuda/net/ladenthin/llama/ crosscompile-linux-x86_64: name: Cross-Compile manylinux2014 x86_64 @@ -372,7 +363,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Display CPU Info shell: bash run: | @@ -389,7 +380,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: Linux-x86_64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ crosscompile-linux-aarch64: name: Build and Test Linux aarch64 @@ -414,7 +405,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - uses: actions/setup-java@v5 with: distribution: 'temurin' @@ -436,15 +427,15 @@ jobs: - name: Build libraries shell: bash run: | - mvn --no-transfer-progress compile + mvn --no-transfer-progress -f llama/pom.xml compile .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=aarch64 -DGGML_NATIVE=OFF -DBUILD_TESTING=ON" - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: Linux-aarch64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ crosscompile-android-aarch64: name: Cross-Compile Android aarch64 @@ -464,7 +455,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Display CPU Info shell: bash run: | @@ -481,7 +472,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: Linux-Android-aarch64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ crosscompile-android-aarch64-opencl: name: Cross-Compile Android aarch64 (OpenCL/Adreno) @@ -502,7 +493,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Build libraries shell: bash run: | @@ -511,7 +502,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: android-libraries-opencl - path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_android_opencl/net/ladenthin/llama/ # --------------------------------------------------------------------------- # Native build jobs — produce release artifacts + run C++ unit tests @@ -532,7 +523,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - uses: actions/setup-java@v5 with: distribution: 'temurin' @@ -552,15 +543,15 @@ jobs: - name: Build libraries shell: bash run: | - mvn --no-transfer-progress compile + mvn --no-transfer-progress -f llama/pom.xml compile .github/build.sh -DLLAMA_METAL=OFF -DGGML_NATIVE=OFF -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: macos-15-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ build-macos-arm64-metal: name: Build and Test macOS 14 arm64 (Metal) @@ -577,7 +568,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - uses: actions/setup-java@v5 with: distribution: 'temurin' @@ -597,15 +588,15 @@ jobs: - name: Build libraries shell: bash run: | - mvn --no-transfer-progress compile + mvn --no-transfer-progress -f llama/pom.xml compile .github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: macos-14-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ build-windows-x86_64-msvc: name: Build and Test Windows 2025 x86_64 (MSVC / VS 2026, classifier) @@ -617,7 +608,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Display CPU Info shell: pwsh run: | @@ -634,12 +625,12 @@ jobs: run: | .github\build.bat -G "Visual Studio 18 2026" -A "x64" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: Windows-x86_64-msvc - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ build-windows-x86-msvc: name: Build and Test Windows 2025 x86 (MSVC / VS 2026, classifier) @@ -651,7 +642,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Display CPU Info shell: pwsh run: | @@ -668,12 +659,12 @@ jobs: run: | .github\build.bat -G "Visual Studio 18 2026" -A "Win32" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: Windows-x86-msvc - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # --------------------------------------------------------------------------- # Windows Ninja Multi-Config + sccache — the DEFAULT Windows CPU natives. @@ -703,7 +694,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Set up MSVC developer environment (x64) uses: ilammy/msvc-dev-cmd@v1 with: @@ -736,12 +727,12 @@ jobs: run: | .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: Windows-x86_64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ build-windows-x86: name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, default) @@ -757,7 +748,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Set up MSVC developer environment (x86) uses: ilammy/msvc-dev-cmd@v1 with: @@ -790,12 +781,12 @@ jobs: run: | .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: Windows-x86-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # --------------------------------------------------------------------------- # Windows GPU classifiers (x86_64 only) — CUDA, Vulkan, OpenCL. @@ -826,7 +817,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Set up MSVC developer environment (x64) uses: ilammy/msvc-dev-cmd@v1 with: @@ -864,7 +855,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: Windows-x86_64-cuda - path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_cuda/net/ladenthin/llama/ if-no-files-found: error build-windows-x86_64-vulkan: @@ -881,7 +872,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Set up MSVC developer environment (x64) uses: ilammy/msvc-dev-cmd@v1 with: @@ -913,7 +904,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: Windows-x86_64-vulkan - path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_vulkan/net/ladenthin/llama/ if-no-files-found: error build-windows-x86_64-opencl: @@ -930,7 +921,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - name: Set up MSVC developer environment (x64) uses: ilammy/msvc-dev-cmd@v1 with: @@ -957,7 +948,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: Windows-x86_64-opencl - path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_opencl/net/ladenthin/llama/ if-no-files-found: error # --------------------------------------------------------------------------- @@ -983,10 +974,10 @@ jobs: cat /proc/cpuinfo - name: Build libraries run: | - mvn -q --no-transfer-progress compile + mvn -q --no-transfer-progress -f llama/pom.xml compile .github/build.sh -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure build-macos-arm64-metal-15: name: Build and Test macOS 15 arm64 (Metal) @@ -1003,7 +994,7 @@ jobs: uses: actions/download-artifact@v8 with: name: webui-generated - path: ${{ github.workspace }}/webui-generated/ + path: ${{ github.workspace }}/llama/webui-generated/ - uses: actions/setup-java@v5 with: distribution: 'temurin' @@ -1023,15 +1014,15 @@ jobs: - name: Build libraries shell: bash run: | - mvn --no-transfer-progress compile + mvn --no-transfer-progress -f llama/pom.xml compile .github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DGGML_NATIVE=OFF -DBUILD_TESTING=ON - name: Run C++ unit tests - run: ctest --test-dir build --output-on-failure + run: ctest --test-dir llama/build --output-on-failure - name: Upload artifacts uses: actions/upload-artifact@v7 with: name: macos-15-metal-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # --------------------------------------------------------------------------- # Java test jobs — download release artifact, run mvn test @@ -1054,7 +1045,7 @@ jobs: - uses: actions/download-artifact@v8 with: name: Linux-x86_64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # GGUF models are downloaded + cached ONCE by the upstream `download-models` job # (this job `needs:` it), so here we only RESTORE the shared cache — no per-job # download logic. GGUF is platform-independent, so ubuntu + macOS + Windows share @@ -1079,7 +1070,7 @@ jobs: echo "${{ github.workspace }}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern - name: Run tests run: | - mvn -e --no-transfer-progress -P jcstress test \ + mvn -e --no-transfer-progress -f llama/pom.xml -P jcstress test \ -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ @@ -1091,15 +1082,15 @@ jobs: if: success() with: name: jacoco-report - path: target/site/jacoco/jacoco.xml + path: llama/target/site/jacoco/jacoco.xml if-no-files-found: ignore - name: Run PIT mutation tests - run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage + run: mvn --batch-mode --no-transfer-progress -f llama/pom.xml test-compile org.pitest:pitest-maven:mutationCoverage - name: Extract PIT survivors if: always() run: | echo "=== PIT Survived Mutations ===" - for html_file in $(find target/pit-reports -name "*.html" -type f 2>/dev/null | sort); do + for html_file in $(find llama/target/pit-reports -name "*.html" -type f 2>/dev/null | sort); do if grep -q "SURVIVED" "$html_file"; then echo "Found survivors in $html_file:" grep -B 2 -A 3 "SURVIVED" "$html_file" @@ -1108,7 +1099,7 @@ jobs: done - uses: actions/upload-artifact@v7 if: always() - with: { name: pit-reports, path: target/pit-reports/ } + with: { name: pit-reports, path: llama/target/pit-reports/ } - name: Memory after tests if: always() run: free -h @@ -1146,13 +1137,13 @@ jobs: # -Dtest matches simple class names, not package globs; the default suite is # excluded from the vmlens package via pom.xml managed surefire ). run: >- - mvn --batch-mode --no-transfer-progress -Pvmlens test + mvn --batch-mode --no-transfer-progress -f llama/pom.xml -Pvmlens test -Dtest=VmlensInterleavingSmokeTest,SessionStateInterleavingTest -DfailIfNoTests=false - uses: actions/upload-artifact@v7 if: always() with: name: vmlens-report - path: target/vmlens-report/ + path: llama/target/vmlens-report/ if-no-files-found: ignore test-java-macos-arm64-metal: @@ -1172,7 +1163,7 @@ jobs: - uses: actions/download-artifact@v8 with: name: macos-14-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # GGUF models are downloaded + cached ONCE by the upstream `download-models` job # (this job `needs:` it), so here we only RESTORE the shared cache — no per-job # download logic. GGUF is platform-independent, so ubuntu + macOS + Windows share @@ -1195,7 +1186,7 @@ jobs: run: ulimit -c unlimited - name: Run tests run: | - mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \ + mvn -e --no-transfer-progress -f llama/pom.xml -Dnet.ladenthin.llama.test.ngl=0 test \ -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ @@ -1236,7 +1227,7 @@ jobs: - uses: actions/download-artifact@v8 with: name: macos-15-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # GGUF models are downloaded + cached ONCE by the upstream `download-models` job # (this job `needs:` it), so here we only RESTORE the shared cache — no per-job # download logic. GGUF is platform-independent, so ubuntu + macOS + Windows share @@ -1259,7 +1250,7 @@ jobs: run: ulimit -c unlimited - name: Run tests run: | - mvn -e --no-transfer-progress test \ + mvn -e --no-transfer-progress -f llama/pom.xml test \ -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ @@ -1300,7 +1291,7 @@ jobs: - uses: actions/download-artifact@v8 with: name: macos-15-metal-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # GGUF models are downloaded + cached ONCE by the upstream `download-models` job # (this job `needs:` it), so here we only RESTORE the shared cache — no per-job # download logic. GGUF is platform-independent, so ubuntu + macOS + Windows share @@ -1323,7 +1314,7 @@ jobs: run: ulimit -c unlimited - name: Run tests run: | - mvn -e --no-transfer-progress test \ + mvn -e --no-transfer-progress -f llama/pom.xml test \ -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ @@ -1367,7 +1358,7 @@ jobs: - uses: actions/download-artifact@v8 with: name: Windows-x86_64-libraries - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # GGUF models are downloaded + cached ONCE by the upstream `download-models` job # (this job `needs:` it), so here we only RESTORE the shared cache — no per-job # download logic. validate-models is kept as an integrity guard so a partial/absent @@ -1405,7 +1396,7 @@ jobs: Get-ItemProperty -Path $key | Format-List - name: Run tests run: | - mvn -e --no-transfer-progress test ` + mvn -e --no-transfer-progress -f llama/pom.xml test ` "-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" ` "-Dnet.ladenthin.llama.nomic.path=models/$env:NOMIC_EMBED_MODEL_NAME" ` "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" ` @@ -1429,7 +1420,7 @@ jobs: ${{ github.workspace }}\target\surefire-reports\*.dumpstream ${{ github.workspace }}\target\surefire-reports\*.txt ${{ github.workspace }}\target\surefire-reports\TEST-*.xml - ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* + ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/**/* if-no-files-found: warn # Java/inference validation of the MSVC-built x86_64 DLL (the analogue of @@ -1456,7 +1447,7 @@ jobs: - uses: actions/download-artifact@v8 with: name: Windows-x86_64-msvc - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ # GGUF models are downloaded + cached ONCE by the upstream `download-models` job # (this job `needs:` it), so here we only RESTORE the shared cache — no per-job # download logic. validate-models is kept as an integrity guard so a partial/absent @@ -1494,7 +1485,7 @@ jobs: Get-ItemProperty -Path $key | Format-List - name: Run tests run: | - mvn -e --no-transfer-progress test ` + mvn -e --no-transfer-progress -f llama/pom.xml test ` "-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" ` "-Dnet.ladenthin.llama.nomic.path=models/$env:NOMIC_EMBED_MODEL_NAME" ` "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" ` @@ -1518,7 +1509,7 @@ jobs: ${{ github.workspace }}\target\surefire-reports\*.dumpstream ${{ github.workspace }}\target\surefire-reports\*.txt ${{ github.workspace }}\target\surefire-reports\TEST-*.xml - ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* + ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/**/* if-no-files-found: warn # --------------------------------------------------------------------------- @@ -1554,38 +1545,38 @@ jobs: with: pattern: "*-libraries" merge-multiple: true - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: linux-libraries-cuda - path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_linux_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: android-libraries-opencl - path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_android_opencl/net/ladenthin/llama/ # MSVC-built Windows natives -> `msvc-windows` classifier tree. The default JAR # now ships the Ninja `*-libraries` natives merged above (default flip). - uses: actions/download-artifact@v8 with: name: Windows-x86_64-msvc - path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_msvc/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86-msvc - path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_msvc/net/ladenthin/llama/ # Windows GPU classifiers (x86_64 only) -> one tree each. - uses: actions/download-artifact@v8 with: name: Windows-x86_64-cuda - path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-vulkan - path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_vulkan/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-opencl - path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_opencl/net/ladenthin/llama/ - uses: actions/setup-java@v5 with: distribution: 'temurin' @@ -1604,7 +1595,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: llama-jars - path: target/*.jar + path: llama/target/*.jar report: name: Report @@ -1668,35 +1659,35 @@ jobs: with: pattern: "*-libraries" merge-multiple: true - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: linux-libraries-cuda - path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_linux_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: android-libraries-opencl - path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_android_opencl/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-msvc - path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_msvc/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86-msvc - path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_msvc/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-cuda - path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-vulkan - path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_vulkan/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-opencl - path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_opencl/net/ladenthin/llama/ - name: Set up Maven Central Repository uses: actions/setup-java@v5 with: @@ -1716,27 +1707,21 @@ jobs: *-SNAPSHOT) echo "OK: -SNAPSHOT version, continuing snapshot deploy." ;; *) echo "::error::Refusing to publish non-SNAPSHOT version '$VERSION' from the snapshot job. Snapshot publishing requires a -SNAPSHOT version; releases go through the v* tag path."; exit 1 ;; esac - - name: Publish snapshot + # One reactor deploy publishes all three artifacts at the same version: + # net.ladenthin:llama-parent (the pom), :llama (the core jar + classifiers), and + # :llama-langchain4j. The `release` profile (GPG + Central Publishing) is inherited + # from the parent, so every module — including the parent pom — is signed. + - name: Publish snapshot (reactor - parent + llama + llama-langchain4j) run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows -Dmaven.test.skip=true deploy env: MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - # Deploy the sibling llama-langchain4j at the same version. The core `deploy` - # above ran through `install`, so net.ladenthin:llama is in the local repo for - # the module to resolve. Standalone module (not in the reactor), so it is a - # separate deploy invocation. - - name: Publish snapshot (llama-langchain4j) - run: mvn --batch-mode --no-transfer-progress -f llama-langchain4j/pom.xml -P release -Dmaven.test.skip=true deploy - env: - MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} - MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} - MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - name: Collect signed artifacts run: | mkdir -p signed-snapshot-assets - cp target/*.jar signed-snapshot-assets/ 2>/dev/null || true - cp target/*.jar.asc signed-snapshot-assets/ 2>/dev/null || true + cp llama/target/*.jar signed-snapshot-assets/ 2>/dev/null || true + cp llama/target/*.jar.asc signed-snapshot-assets/ 2>/dev/null || true cp llama-langchain4j/target/*.jar signed-snapshot-assets/ 2>/dev/null || true cp llama-langchain4j/target/*.jar.asc signed-snapshot-assets/ 2>/dev/null || true - uses: actions/upload-artifact@v7 @@ -1784,35 +1769,35 @@ jobs: with: pattern: "*-libraries" merge-multiple: true - path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: linux-libraries-cuda - path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_linux_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: android-libraries-opencl - path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_android_opencl/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-msvc - path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_msvc/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86-msvc - path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_msvc/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-cuda - path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-vulkan - path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_vulkan/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: name: Windows-x86_64-opencl - path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ + path: ${{ github.workspace }}/llama/src/main/resources_windows_opencl/net/ladenthin/llama/ - name: Set up Maven Central Repository uses: actions/setup-java@v5 with: @@ -1823,27 +1808,21 @@ jobs: server-password: MAVEN_PASSWORD gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} gpg-passphrase: MAVEN_GPG_PASSPHRASE - - name: Publish release + # One reactor deploy publishes all three artifacts at the same version: + # net.ladenthin:llama-parent (the pom), :llama (the core jar + classifiers), and + # :llama-langchain4j. The `release` profile (GPG + Central Publishing) is inherited + # from the parent, so every module — including the parent pom — is signed. + - name: Publish release (reactor - parent + llama + llama-langchain4j) run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows -Dmaven.test.skip=true deploy env: MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - # Deploy the sibling llama-langchain4j at the same version. The core `deploy` - # above ran through `install`, so net.ladenthin:llama is in the local repo for - # the module to resolve. Standalone module (not in the reactor), so it is a - # separate deploy invocation. - - name: Publish release (llama-langchain4j) - run: mvn --batch-mode --no-transfer-progress -f llama-langchain4j/pom.xml -P release -Dmaven.test.skip=true deploy - env: - MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} - MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} - MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - name: Collect signed artifacts run: | mkdir -p signed-release-assets - cp target/*.jar signed-release-assets/ 2>/dev/null || true - cp target/*.jar.asc signed-release-assets/ 2>/dev/null || true + cp llama/target/*.jar signed-release-assets/ 2>/dev/null || true + cp llama/target/*.jar.asc signed-release-assets/ 2>/dev/null || true cp llama-langchain4j/target/*.jar signed-release-assets/ 2>/dev/null || true cp llama-langchain4j/target/*.jar.asc signed-release-assets/ 2>/dev/null || true - uses: actions/upload-artifact@v7 diff --git a/.github/workflows/sonarqube.yml b/.github/workflows/sonarqube.yml index b6a4808b..2ee66a0a 100644 --- a/.github/workflows/sonarqube.yml +++ b/.github/workflows/sonarqube.yml @@ -37,4 +37,4 @@ jobs: - name: Build and analyze env: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - run: mvn -B --no-transfer-progress verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=bernardladenthin_java-llama.cpp + run: mvn -B --no-transfer-progress -f llama/pom.xml verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=bernardladenthin_java-llama.cpp diff --git a/.gitignore b/.gitignore index be02ca4b..ba0a69ed 100644 --- a/.gitignore +++ b/.gitignore @@ -38,28 +38,28 @@ hs_err_pid* replay_pid* models/*.gguf -src/main/cpp/net_ladenthin_llama_*.h -src/main/resources_cuda_linux/ +llama/src/main/cpp/net_ladenthin_llama_*.h +llama/src/main/resources_cuda_linux/ # Per-classifier native trees, staged by CI before the matching Maven profile runs, # never committed (same policy as the default-tree native libs below). -src/main/resources_windows_msvc/ -src/main/resources_windows_cuda/ -src/main/resources_windows_vulkan/ -src/main/resources_windows_opencl/ -src/main/resources/**/*.so -src/main/resources/**/*.dylib -src/main/resources/**/*.dll -src/main/resources/**/*.metal -src/test/resources/**/*.gbnf +llama/src/main/resources_windows_msvc/ +llama/src/main/resources_windows_cuda/ +llama/src/main/resources_windows_vulkan/ +llama/src/main/resources_windows_opencl/ +llama/src/main/resources/**/*.so +llama/src/main/resources/**/*.dylib +llama/src/main/resources/**/*.dll +llama/src/main/resources/**/*.metal +llama/src/test/resources/**/*.gbnf # Generated WebUI assets (ui.cpp/ui.h) produced once by the build-webui CI job and # downloaded into every native build; embedded into libjllama, never committed # (this repo commits no build outputs — same policy as the native libs above). -/webui-generated/ +/llama/webui-generated/ **/*.etag **/*.lastModified -src/main/cpp/llama.cpp/ +llama/src/main/cpp/llama.cpp/ # jcstress / jqwik test outputs (generated in repo root) /.jqwik-database diff --git a/CLAUDE.md b/CLAUDE.md index db03010f..86d74121 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,7 +16,7 @@ To change the CUDA version, update the following **three** places: 1. **`.github/build_cuda_linux.sh`** — Line 10: `sudo dnf install -y cuda-toolkit-13-2` 2. **`.github/build_cuda_linux.sh`** — Line 12: `-DCMAKE_CUDA_COMPILER=/usr/local/cuda-13.2/bin/nvcc` -3. **`pom.xml`** — The `` tag in the `cuda` jar execution: `cuda13-linux-x86-64` +3. **`llama/pom.xml`** — The `` tag in the `cuda` jar execution: `cuda13-linux-x86-64` Also update the header comment in `build_cuda_linux.sh` and the job name in `.github/workflows/release.yaml` for clarity. @@ -32,9 +32,9 @@ Example: To upgrade from 13.2 to a hypothetical 13.3: # Edit .github/build_cuda_linux.sh: # line 10: cuda-toolkit-13-2 -> cuda-toolkit-13-3 # line 12: /usr/local/cuda-13.2/bin/nvcc -> /usr/local/cuda-13.3/bin/nvcc -# Edit pom.xml classifier: cuda13-linux-x86-64 (major version only, no need to change for minor bumps) +# Edit llama/pom.xml classifier: cuda13-linux-x86-64 (major version only, no need to change for minor bumps) # Edit CLAUDE.md line: Current CUDA version: **13.2** -> **13.3** -git add .github/build_cuda_linux.sh pom.xml CLAUDE.md +git add .github/build_cuda_linux.sh llama/pom.xml CLAUDE.md git commit -m "Upgrade CUDA from 13.2 to 13.3" ``` @@ -88,7 +88,7 @@ This is enforced through bionic's **weak-symbol** mechanism, *not* by bumping `__ANDROID_API__` or passing `-DANDROID_PLATFORM`. See "How the API gate is satisfied" below for why. To change anything here, update: -1. **`CMakeLists.txt`** — the `add_compile_definitions(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__)` +1. **`llama/CMakeLists.txt`** — the `add_compile_definitions(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__)` block and its Android-detection guard (`OS_NAME MATCHES "Android"` etc.). 2. **`CLAUDE.md`** (this file) — the "Current Android minimum API level" line above. 3. **`README.md`** — the minimum-API note (the `[!NOTE]` block near the Android @@ -134,7 +134,7 @@ The default Android arm64 JAR remains CPU-only. Three places wire it together (mirrors the CUDA classifier pattern): -1. **`CMakeLists.txt`** — `elseif(GGML_OPENCL)` branch routes artifacts to +1. **`llama/CMakeLists.txt`** — `elseif(GGML_OPENCL)` branch routes artifacts to `src/main/resources_android_opencl/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}/`. 2. **`.github/workflows/publish.yml`** — `crosscompile-android-aarch64-opencl` job runs the dockcross-android-arm64 build with @@ -142,7 +142,7 @@ Three places wire it together (mirrors the CUDA classifier pattern): and uploads as artifact `android-libraries-opencl`. The `package`, `publish-snapshot`, and `publish-release` jobs download it into `resources_android_opencl/` and activate the `opencl-android` Maven profile. -3. **`pom.xml`** — the `opencl-android` profile produces a second JAR with +3. **`llama/pom.xml`** — the `opencl-android` profile produces a second JAR with `opencl-android-aarch64` from the `${project.build.outputDirectory}_opencl_android` tree. @@ -196,7 +196,7 @@ local / self-hosted. Wiring (mirrors the CUDA-Linux / OpenCL-Android classifier pattern): -1. **`CMakeLists.txt`** — the `if(GGML_CUDA) … elseif(GGML_VULKAN) … elseif(GGML_OPENCL) … else()` +1. **`llama/CMakeLists.txt`** — the `if(GGML_CUDA) … elseif(GGML_VULKAN) … elseif(GGML_OPENCL) … else()` chain is **OS-aware**: CUDA → `resources_windows_cuda` on Windows (else `resources_linux_cuda`), Vulkan → `resources_windows_vulkan`, OpenCL → `resources_windows_opencl` on Windows (else `resources_android_opencl`). The default CPU build (both generators) still emits to the canonical @@ -225,7 +225,7 @@ Wiring (mirrors the CUDA-Linux / OpenCL-Android classifier pattern): The `package`, `publish-snapshot`, and `publish-release` jobs download each non-default artifact into its `src/main/resources_windows_{msvc,cuda,vulkan,opencl}/` tree and activate the `windows-msvc,cuda-windows,vulkan-windows,opencl-windows` Maven profiles. -5. **`pom.xml`** — profiles `windows-msvc` / `cuda-windows` / `vulkan-windows` / `opencl-windows`, +5. **`llama/pom.xml`** — profiles `windows-msvc` / `cuda-windows` / `vulkan-windows` / `opencl-windows`, each a separate compile pass + resource copy + classified jar (classifiers `msvc-windows` / `cuda13-windows-x86-64` / `vulkan-windows-x86-64` / `opencl-windows-x86-64`). Activated only in CI. 6. **`README.md`** — the classifier table + dependency snippets in "Choosing the right classifier". @@ -263,7 +263,7 @@ checked in (same policy as the native libs). Pipeline (`.github/workflows/publish.yml`): 1. **`build-webui` job** (ubuntu — the *only* job that runs `npm`): resolves the - pinned `b` tag from `CMakeLists.txt`'s `GIT_TAG`, sparse-checks-out + pinned `b` tag from `llama/CMakeLists.txt`'s `GIT_TAG`, sparse-checks-out `ggml-org/llama.cpp@` `tools/ui`, runs the upstream Svelte build (`npm ci && npm run build`), gzips `dist/` into `dist/_gzip/` (LLAMA_UI_GZIP parity), builds the self-contained `llama-ui-embed` host tool (plain C++17, **no @@ -472,16 +472,16 @@ re-verify the generator the same way you re-verify `patches/`. To change the llama.cpp version, update the following **three** files (and re-verify `patches/`): -1. **CMakeLists.txt** — the `GIT_TAG` line for llama.cpp: `GIT_TAG b8831` +1. **llama/CMakeLists.txt** — the `GIT_TAG` line for llama.cpp: `GIT_TAG b8831` 2. **README.md** — the badge and link line with the version number 3. **CLAUDE.md** — the "Current llama.cpp pinned version" line Example: To upgrade from b8808 to b8831: ```bash -# Edit CMakeLists.txt: change GIT_TAG b8808 to b8831 +# Edit llama/CMakeLists.txt: change GIT_TAG b8808 to b8831 # Edit README.md: change b8808 to b8831 (in both badge and link) # Edit CLAUDE.md: change b8808 to b8831 -git add CMakeLists.txt README.md CLAUDE.md +git add llama/CMakeLists.txt README.md CLAUDE.md git commit -m "Upgrade llama.cpp from b8808 to b8831" git push -u origin ``` @@ -1225,17 +1225,46 @@ keeping it clear of the JPMS module-mode javadoc trap that bit BAF. **Before rai javadoc source level to ≥ 9, read** [`../workspace/policies/jpms-module-descriptor.md`](../workspace/policies/jpms-module-descriptor.md). -## LangChain4j integration (`llama-langchain4j` sibling module) +## Repository layout — Maven reactor (`llama/` + `llama-langchain4j/`) + +The repo root is a thin **aggregator/parent POM** (`net.ladenthin:llama-parent`, +`packaging=pom`) with two modules: + +- **`llama/`** — the native JNI core (`net.ladenthin:llama`). *All the core sources and build + files live here now:* `llama/src/`, `llama/CMakeLists.txt`, `llama/cmake/`, `llama/patches/`, + `llama/pom.xml`, `llama/spotbugs-exclude.xml`, `llama/lombok.config`, `llama/.clang-format`. + Its published coordinates are unchanged (`net.ladenthin:llama`), so consumers are unaffected. +- **`llama-langchain4j/`** — the LangChain4j adapters (see below). + +Both modules inherit the single `` from the parent, so they **ship in lockstep by +construction** (no CI guard needed). The parent also holds the shared `release` profile (GPG + +Central Publishing), so one reactor `mvn -P release deploy` signs and publishes all three +artifacts (`llama-parent` pom, `llama`, `llama-langchain4j`) at the same version. + +**Consequences for build commands:** the core's cmake/native build runs *in `llama/`*. +`.github/build.sh` / `build.bat` `cd` into `llama/` themselves (relative to the script), so CI +and the dockcross containers (whose workdir stays the repo root) are unaffected. Locally, run +core cmake builds from `llama/` (e.g. `cd llama && cmake -B build && cmake --build build`), and +target the core with Maven via `-f llama/pom.xml` (or `-pl llama -am` from the root). A plain +`mvn` at the root builds the whole reactor. **When a build-command example elsewhere in this +file shows `cmake -B build` / `src/main/...` / `mvn compile` at the root, read it as running in +`llama/`** (the paths moved; the recipes are otherwise unchanged). + +**Version bump:** change the `` in the **root** `pom.xml` only; `llama` and +`llama-langchain4j` inherit it. (The SNAPSHOT/`-SNAPSHOT` line and the README badge still need +the usual manual update.) + +## LangChain4j integration (`llama-langchain4j` reactor module) `llama-langchain4j/` adapts a `LlamaModel` to LangChain4j's `ChatModel`, `StreamingChatModel`, `EmbeddingModel` and `ScoringModel` interfaces **in-process over -JNI** (no HTTP hop). It is a **standalone sibling module**, deliberately *not* in the root -reactor, so the native build/release pipeline is untouched. +JNI** (no HTTP hop). It is a **reactor module** alongside the core `llama` module (see +"Repository layout" above), so it is built, versioned and released together with the core. Why it is a **separate artifact** and not a classifier of the core: langchain4j 1.x requires **Java 17** (the core stays Java 8), and classifiers share the core's single POM — adding `langchain4j-core` there would force it (and the Java 17 floor) on every plain -`net.ladenthin:llama` consumer. A separate `artifactId` with its own POM is the only way to +`net.ladenthin:llama` consumer. A separate `artifactId` (its own module POM) is the only way to keep that dependency (and Java floor) off the core. It is pure Java with **no per-classifier matrix**: it compiles against the core's Java API, which is identical across every native classifier; the backend (CPU/CUDA/OpenCL/Vulkan) is a runtime classpath choice for the @@ -1243,33 +1272,29 @@ consumer. Wiring: -1. **`llama-langchain4j/pom.xml`** — `net.ladenthin:llama-langchain4j`, `release 17`, - depends on `net.ladenthin:llama:${project.version}` (so the core dep always matches the - module's own version) and `dev.langchain4j:langchain4j-core`. Carries its own - sources/javadoc/gpg + `release` profile (Central requires per-artifact signing; the module - has no parent to inherit them from — plugin versions are pinned in lockstep with the root - `pom.xml`). Java package stays `net.ladenthin.llama.langchain4j` (package name need not track - the artifactId). -2. **`.github/workflows/publish.yml`** — the `test-java-llama-langchain4j` job installs the - core Java jar, runs a **version-lockstep guard** (module version must equal core version, - else the build fails — the standalone module can't inherit `${project.version}` from a - reactor), then `mvn -f llama-langchain4j/pom.xml verify` (7 model-free mapping unit tests - run; the 4 model-backed integration tests self-skip without a GGUF; `verify` also builds the - javadoc jar so a release-time javadoc break is caught in PR CI). The - `publish-snapshot`/`publish-release` jobs `needs:` this job and, after the core `deploy` - (which installs the core jar locally), run a second `deploy` of the module at the same - version. A separate **`test-java-llama-langchain4j-integration`** job runs the model-backed - tests (chat/streaming/embedding/scoring adapters) by **reusing** the shared GGUF cache +1. **`llama-langchain4j/pom.xml`** — `net.ladenthin:llama-langchain4j`, `release 17`, a child of + `net.ladenthin:llama-parent` (so it **inherits `${project.version}`** — no hardcoded version, + no lockstep guard). Depends on `net.ladenthin:llama:${project.version}` and + `dev.langchain4j:langchain4j-core`. Builds its own sources/javadoc jars; the `release` + profile (GPG + Central Publishing) is **inherited from the parent**, not duplicated here. + Java package stays `net.ladenthin.llama.langchain4j` (package name need not track the artifactId). +2. **`.github/workflows/publish.yml`** — the `test-java-llama-langchain4j` job installs + parent + core into the local repo (`mvn -pl llama -am -DskipTests install`), then + `mvn -f llama-langchain4j/pom.xml verify` (7 model-free mapping unit tests run; the 4 + model-backed integration tests self-skip without a GGUF; `verify` also builds the javadoc + jar so a release-time javadoc break is caught in PR CI). The `publish-snapshot`/ + `publish-release` jobs `needs:` this job; deployment is a **single reactor** + `mvn -P release deploy` (no separate module deploy step — the parent's inherited `release` + profile signs and publishes parent + llama + llama-langchain4j together at the same version). + A separate **`test-java-llama-langchain4j-integration`** job runs the model-backed tests + (chat/streaming/embedding/scoring adapters) by **reusing** the shared GGUF cache (`gguf-models-v1`, restore-only — no extra download) and the `Linux-x86_64-libraries` native artifact: it `needs: [crosscompile-linux-x86_64, download-models]` (so the cache is already - populated and it runs in parallel), installs the core jar with the downloaded native lib - bundled, and passes the already-cached chat - (`REASONING_MODEL_NAME`), nomic-embedding and jina-reranker model paths via the module's + populated and it runs in parallel), installs parent+core with the downloaded native lib + bundled, and passes the already-cached chat (`REASONING_MODEL_NAME`), nomic-embedding and + jina-reranker model paths via the module's `-Dnet.ladenthin.llama.langchain4j.{embedding,rerank}.model` / `net.ladenthin.llama.model.path` properties. It is validation-only (not a release gate); a cold cache degrades to a self-skip. -3. **Version bumps** — when the root `pom.xml` `` changes, bump - `llama-langchain4j/pom.xml` `` to match in the same commit, or the lockstep guard - reds CI. **Open follow-ups** (documented in `llama-langchain4j/README.md`): tool calling (`ToolSpecification` ↔ jllama `ToolDefinition`), `response_format`/JSON mode, and multimodal diff --git a/REUSE.toml b/REUSE.toml index df4c5422..55720a91 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -19,7 +19,7 @@ path = [ "docs/history/49be664_open_issues_comments.md", "docs/history/CHAT_INTEGRATION_SUMMARY.md", "docs/history/REFACTORING.md", - "src/test/resources/images/README.md", + "llama/src/test/resources/images/README.md", ".github/PULL_REQUEST_TEMPLATE.md", ".github/ISSUE_TEMPLATE/bug_report.md", ".github/ISSUE_TEMPLATE/feature_request.md", @@ -43,7 +43,7 @@ SPDX-License-Identifier = "MIT" # CMakeLists.txt (CMake format not recognized by reuse annotate) [[annotations]] -path = "CMakeLists.txt" +path = "llama/CMakeLists.txt" SPDX-FileCopyrightText = [ "2023-2025 Konstantin Herud", "2026 Bernard Ladenthin ", @@ -79,13 +79,13 @@ SPDX-License-Identifier = "MIT" # diff). Glob covers every current and future patch dropped into patches/ — see CLAUDE.md # "Local llama.cpp source patches". [[annotations]] -path = "patches/**" +path = "llama/patches/**" SPDX-FileCopyrightText = "2026 Bernard Ladenthin " SPDX-License-Identifier = "MIT" # Test image (binary, cannot carry inline SPDX) [[annotations]] -path = "src/test/resources/images/test-image.jpg" +path = "llama/src/test/resources/images/test-image.jpg" SPDX-FileCopyrightText = "2026 Bernard Ladenthin " SPDX-License-Identifier = "MIT" diff --git a/llama-langchain4j/pom.xml b/llama-langchain4j/pom.xml index ecb0b8bb..dabee5b2 100644 --- a/llama-langchain4j/pom.xml +++ b/llama-langchain4j/pom.xml @@ -9,9 +9,14 @@ SPDX-License-Identifier: MIT xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - net.ladenthin + + net.ladenthin + llama-parent + 5.0.4-SNAPSHOT + ../pom.xml + + llama-langchain4j - 5.0.4-SNAPSHOT jar ${project.groupId}:${project.artifactId} @@ -61,8 +66,6 @@ SPDX-License-Identifier: MIT 3.5.6 3.4.0 3.12.0 - 3.2.8 - 0.11.0 @@ -147,47 +150,9 @@ SPDX-License-Identifier: MIT - - - - release - - - - org.apache.maven.plugins - maven-gpg-plugin - ${gpg.plugin.version} - - - sign-artifacts - verify - sign - - ${gpg.keyname} - - --pinentry-mode - loopback - - - - - - - - org.sonatype.central - central-publishing-maven-plugin - ${central.plugin.version} - true - - central - true - published - - - - - - + diff --git a/.clang-format b/llama/.clang-format similarity index 100% rename from .clang-format rename to llama/.clang-format diff --git a/.clang-tidy b/llama/.clang-tidy similarity index 100% rename from .clang-tidy rename to llama/.clang-tidy diff --git a/CMakeLists.txt b/llama/CMakeLists.txt similarity index 98% rename from CMakeLists.txt rename to llama/CMakeLists.txt index b687c1c1..51bebdfd 100644 --- a/CMakeLists.txt +++ b/llama/CMakeLists.txt @@ -277,10 +277,13 @@ endif() # include jni.h and jni_md.h if(NOT DEFINED JNI_INCLUDE_DIRS) + # The bundled JNI headers live in .github/include/ at the REPO ROOT, one level above + # this module (the core moved into llama/). Anchor to the parent of the CMake source + # dir so the path is correct regardless of the build CWD. if(OS_NAME MATCHES "^Linux" OR OS_NAME STREQUAL "Mac" OR OS_NAME STREQUAL "Darwin") - set(JNI_INCLUDE_DIRS .github/include/unix) + set(JNI_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../.github/include/unix) elseif(OS_NAME STREQUAL "Windows") - set(JNI_INCLUDE_DIRS .github/include/windows) + set(JNI_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../.github/include/windows) # if we don't have provided headers, try to find them via Java else() find_package(Java REQUIRED) diff --git a/cmake/apply-llama-patches.cmake b/llama/cmake/apply-llama-patches.cmake similarity index 100% rename from cmake/apply-llama-patches.cmake rename to llama/cmake/apply-llama-patches.cmake diff --git a/cmake/generate-tts-upstream.cmake b/llama/cmake/generate-tts-upstream.cmake similarity index 100% rename from cmake/generate-tts-upstream.cmake rename to llama/cmake/generate-tts-upstream.cmake diff --git a/lombok.config b/llama/lombok.config similarity index 100% rename from lombok.config rename to llama/lombok.config diff --git a/patches/0001-win32-arg-parse-embed-guard.patch b/llama/patches/0001-win32-arg-parse-embed-guard.patch similarity index 100% rename from patches/0001-win32-arg-parse-embed-guard.patch rename to llama/patches/0001-win32-arg-parse-embed-guard.patch diff --git a/patches/0002-server-preserve-caller-load-progress-callback.patch b/llama/patches/0002-server-preserve-caller-load-progress-callback.patch similarity index 100% rename from patches/0002-server-preserve-caller-load-progress-callback.patch rename to llama/patches/0002-server-preserve-caller-load-progress-callback.patch diff --git a/patches/0003-pr22393-server-add-slot-prompt-similarity-getter-setter.patch b/llama/patches/0003-pr22393-server-add-slot-prompt-similarity-getter-setter.patch similarity index 100% rename from patches/0003-pr22393-server-add-slot-prompt-similarity-getter-setter.patch rename to llama/patches/0003-pr22393-server-add-slot-prompt-similarity-getter-setter.patch diff --git a/patches/0004-pr23116-server-per-request-reasoning-budget-tokens.patch b/llama/patches/0004-pr23116-server-per-request-reasoning-budget-tokens.patch similarity index 100% rename from patches/0004-pr23116-server-per-request-reasoning-budget-tokens.patch rename to llama/patches/0004-pr23116-server-per-request-reasoning-budget-tokens.patch diff --git a/llama/pom.xml b/llama/pom.xml new file mode 100644 index 00000000..73b91a09 --- /dev/null +++ b/llama/pom.xml @@ -0,0 +1,1323 @@ + + + + 4.0.0 + + + net.ladenthin + llama-parent + 5.0.4-SNAPSHOT + ../pom.xml + + + llama + jar + + ${project.groupId}:${project.artifactId} + Java Bindings for llama.cpp - A Port of Facebook's LLaMA model + in C/C++. + https://github.com/bernardladenthin/java-llama.cpp + + + + MIT License + https://www.opensource.org/licenses/mit-license.php + repo + + + + + + Bernard Ladenthin + https://github.com/bernardladenthin + + + + + scm:git:https://github.com/bernardladenthin/java-llama.cpp.git + scm:git:https://github.com/bernardladenthin/java-llama.cpp.git + https://github.com/bernardladenthin/java-llama.cpp/tree/main + + + + + central + https://central.sonatype.com/repository/maven-snapshots/ + + + + + bernardladenthin + 5.19.0 + 1.0.0 + 1.18.46 + 2.50.0 + 0.13.7 + 4.2.0 + 2.22.0 + 3.8.6 + 2.0.18 + 1.5.37 + 1.27 + 6.1.1 + 3.0 + 1.37 + 0.16 + 3.6 + 2.12.6 + 1.2.28 + + 1.9.3 + 1.4.2 + 4.10.2.0 + 7.7.4 + 1.14.0 + 3.7.0 + 2.94.0 + UTF-8 + ${git.commit.time} + + + + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + + + + org.projectlombok + lombok + ${lombok.version} + provided + + + org.junit.jupiter + junit-jupiter + ${junit.version} + test + + + org.hamcrest + hamcrest + ${hamcrest.version} + test + + + net.jqwik + jqwik + ${jqwik.version} + test + + + com.tngtech.archunit + archunit-junit5 + ${archunit.version} + test + + + org.jspecify + jspecify + ${jspecify.version} + + + org.checkerframework + checker-qual + ${checker.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + ch.qos.logback + logback-classic + ${logback.version} + runtime + + + + org.codehaus.mojo + animal-sniffer-annotations + ${animal-sniffer.version} + provided + + + org.openjdk.jmh + jmh-core + ${jmh.version} + test + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + test + + + org.openjdk.jcstress + jcstress-core + ${jcstress.version} + test + + + org.jetbrains.lincheck + lincheck + ${lincheck.version} + test + + + + io.github.hakky54 + logcaptor + ${logcaptor.version} + test + + + + io.projectreactor + reactor-core + ${reactor.version} + test + + + io.projectreactor + reactor-test + ${reactor.version} + test + + + + com.vmlens + api + ${vmlens.version} + test + + + + + + + + com.diffplug.spotless + spotless-maven-plugin + ${spotless.version} + + + com.github.spotbugs + spotbugs-maven-plugin + ${spotbugs.version} + + + com.vmlens + vmlens-maven-plugin + ${vmlens.version} + + + io.github.git-commit-id + git-commit-id-maven-plugin + 10.0.0 + + + org.apache.maven.plugins + maven-assembly-plugin + 3.8.0 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.15.0 + + + org.apache.maven.plugins + maven-gpg-plugin + 3.2.8 + + + org.apache.maven.plugins + maven-jar-plugin + 3.5.0 + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.12.0 + + + org.apache.maven.plugins + maven-resources-plugin + 3.5.0 + + + org.apache.maven.plugins + maven-source-plugin + 3.4.0 + + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.6 + + + + **/vmlens/*.java + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.6.3 + + + org.codehaus.mojo + exec-maven-plugin + 3.6.3 + + + org.jacoco + jacoco-maven-plugin + 0.8.15 + + + org.pitest + pitest-maven + 1.25.5 + + + org.sonatype.central + central-publishing-maven-plugin + 0.11.0 + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce + + enforce + + + + + [3.6.3,) + + + [1.8,) + + + + + + commons-logging:commons-logging + + log4j:log4j + + org.hamcrest:hamcrest-core + org.hamcrest:hamcrest-library + org.hamcrest:hamcrest-all + + junit:junit + junit:junit-dep + + + + + + + + + io.github.git-commit-id + git-commit-id-maven-plugin + + + get-git-properties + + revision + + initialize + + + + yyyy-MM-dd'T'HH:mm:ss'Z' + UTC + false + false + + + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 21 + true + true + + + -Xlint:all,-serial,-options,-classfile,-processing + -Werror + + + -processor + lombok.launch.AnnotationProcessorHider$AnnotationProcessor,lombok.launch.AnnotationProcessorHider$ClaimingProcessor,org.checkerframework.checker.nullness.NullnessChecker + -XDaddTypeAnnotationsToSymbol=true + -XDcompilePolicy=simple + --should-stop=ifError=FLOW + -Xplugin:ErrorProne -Xep:NullAway:ERROR -XepOpt:NullAway:OnlyNullMarked=true -XepOpt:NullAway:JSpecifyMode=true -XepOpt:NullAway:CheckOptionalEmptiness=true -XepOpt:NullAway:AcknowledgeRestrictiveAnnotations=true -XepOpt:NullAway:AcknowledgeAndroidRecent=true -XepOpt:NullAway:AssertsEnabled=true -Xep:BoxedPrimitiveEquality:ERROR -Xep:EqualsHashCode:ERROR -Xep:EqualsIncompatibleType:ERROR -Xep:IdentityBinaryExpression:ERROR -Xep:SelfAssignment:ERROR -Xep:SelfComparison:ERROR -Xep:SelfEquals:ERROR -Xep:DeadException:ERROR -Xep:FormatString:ERROR -Xep:InvalidPatternSyntax:ERROR -Xep:OptionalEquality:ERROR -Xep:ImpossibleNullComparison:ERROR + + + + org.projectlombok + lombok + ${lombok.version} + + + com.google.errorprone + error_prone_core + ${errorprone.version} + + + com.uber.nullaway + nullaway + ${nullaway.version} + + + org.checkerframework + checker + ${checker.version} + + + + + + default-compile + + + + module-info.java + + + + + module-info-compile + compile + + compile + + + + 9 + + module-info.java + + + + + + + default-testCompile + + + false + + -XDaddTypeAnnotationsToSymbol=true + -XDcompilePolicy=simple + --should-stop=ifError=FLOW + -Xplugin:ErrorProne -Xep:NullAway:OFF -Xep:GuardedBy:OFF + + + + org.openjdk.jcstress + jcstress-core + ${jcstress.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + + + + + maven-resources-plugin + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + 1.8 + true + true + all + + + + attach-javadocs + + jar + + + + + + org.jacoco + jacoco-maven-plugin + + + prepare-agent + + prepare-agent + + + + report + test + + report + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + @{argLine} -Xmx2g -XX:ErrorFile=hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=. + + true + + + + com.diffplug.spotless + spotless-maven-plugin + + + + src/main/java/**/*.java + src/test/java/**/*.java + + + ${palantir-java-format.version} + + + + + + + + + spotless-check + verify + + check + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + Max + Low + true + false + spotbugs-exclude.xml + + + com.mebigfatguy.fb-contrib + fb-contrib + ${fb-contrib.version} + + + com.h3xstream.findsecbugs + findsecbugs-plugin + ${findsecbugs.version} + + + + + + spotbugs-check + verify + + check + + + + + + org.codehaus.mojo + exec-maven-plugin + + org.openjdk.jmh.Main + test + + + + + org.pitest + pitest-maven + + + org.pitest + pitest-junit5-plugin + 1.2.3 + + + + + net.ladenthin.llama.value.* + net.ladenthin.llama.exception.* + net.ladenthin.llama.args.* + net.ladenthin.llama.json.TimingsLogger + net.ladenthin.llama.json.RerankResponseParser + net.ladenthin.llama.json.ChatResponseParser + net.ladenthin.llama.json.CompletionResponseParser + + + net.ladenthin.llama.value.* + net.ladenthin.llama.exception.* + net.ladenthin.llama.args.* + net.ladenthin.llama.json.* + + 100 + 30000 + + + + + + + + + cuda + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + gpu + compile + + compile + + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_cuda + + + + + + maven-resources-plugin + + + + copy-resources + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_cuda + + + + ${basedir}/src/main/resources_linux_cuda/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + cuda + package + + jar + + + cuda13-linux-x86-64 + + ${project.build.outputDirectory}_cuda + + + + + + + + + + opencl-android + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + opencl-android + compile + + compile + + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_opencl_android + + + + + + maven-resources-plugin + + + + copy-resources-opencl-android + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_opencl_android + + + + ${basedir}/src/main/resources_android_opencl/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + opencl-android + package + + jar + + + opencl-android-aarch64 + + ${project.build.outputDirectory}_opencl_android + + + + + + + + + + windows-msvc + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + windows-msvc + compile + + compile + + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_msvc + + + + + + maven-resources-plugin + + + + copy-resources-windows-msvc + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_msvc + + + + ${basedir}/src/main/resources_windows_msvc/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + windows-msvc + package + + jar + + + msvc-windows + + ${project.build.outputDirectory}_windows_msvc + + + + + + + + + + + cuda-windows + + + + org.apache.maven.plugins + maven-compiler-plugin + + + cuda-windows + compile + + compile + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_cuda + + + + + + maven-resources-plugin + + + copy-resources-cuda-windows + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_cuda + + + + ${basedir}/src/main/resources_windows_cuda/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + cuda-windows + package + + jar + + + cuda13-windows-x86-64 + + ${project.build.outputDirectory}_windows_cuda + + + + + + + + + + + vulkan-windows + + + + org.apache.maven.plugins + maven-compiler-plugin + + + vulkan-windows + compile + + compile + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_vulkan + + + + + + maven-resources-plugin + + + copy-resources-vulkan-windows + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_vulkan + + + + ${basedir}/src/main/resources_windows_vulkan/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + vulkan-windows + package + + jar + + + vulkan-windows-x86-64 + + ${project.build.outputDirectory}_windows_vulkan + + + + + + + + + + + opencl-windows + + + + org.apache.maven.plugins + maven-compiler-plugin + + + opencl-windows + compile + + compile + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_opencl + + + + + + maven-resources-plugin + + + copy-resources-opencl-windows + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_opencl + + + + ${basedir}/src/main/resources_windows_opencl/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + opencl-windows + package + + jar + + + opencl-windows-x86-64 + + ${project.build.outputDirectory}_windows_opencl + + + + + + + + + + vmlens + + + + com.vmlens + vmlens-maven-plugin + + + + **/vmlens/*.java + + + + + vmlens-test + + test + + + + + + + + + jcstress + + + + org.codehaus.mojo + exec-maven-plugin + + + jcstress + test + exec + + ${java.home}/bin/java + test + + -classpath + + org.openjdk.jcstress.Main + -v + -m + default + + + + + + + + + + + assembly + + + + org.apache.maven.plugins + maven-assembly-plugin + + + jar-with-dependencies + + + + net.ladenthin.llama.server.OpenAiCompatServer + + + + + + build-fat-jar + package + + single + + + + + + + + + diff --git a/spotbugs-exclude.xml b/llama/spotbugs-exclude.xml similarity index 100% rename from spotbugs-exclude.xml rename to llama/spotbugs-exclude.xml diff --git a/src/main/cpp/compat/ggml_x86_compat.c b/llama/src/main/cpp/compat/ggml_x86_compat.c similarity index 100% rename from src/main/cpp/compat/ggml_x86_compat.c rename to llama/src/main/cpp/compat/ggml_x86_compat.c diff --git a/src/main/cpp/jllama.cpp b/llama/src/main/cpp/jllama.cpp similarity index 99% rename from src/main/cpp/jllama.cpp rename to llama/src/main/cpp/jllama.cpp index c0eef843..f3871f89 100644 --- a/src/main/cpp/jllama.cpp +++ b/llama/src/main/cpp/jllama.cpp @@ -770,9 +770,8 @@ JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModelWithProgress // Build the special-token id map (a token is -1 / LLAMA_TOKEN_NULL when the model defines none). static json special_tokens_json(const llama_vocab *vocab) { return { - {"bos", llama_vocab_bos(vocab)}, {"eos", llama_vocab_eos(vocab)}, - {"eot", llama_vocab_eot(vocab)}, {"sep", llama_vocab_sep(vocab)}, - {"nl", llama_vocab_nl(vocab)}, {"pad", llama_vocab_pad(vocab)}, + {"bos", llama_vocab_bos(vocab)}, {"eos", llama_vocab_eos(vocab)}, {"eot", llama_vocab_eot(vocab)}, + {"sep", llama_vocab_sep(vocab)}, {"nl", llama_vocab_nl(vocab)}, {"pad", llama_vocab_pad(vocab)}, }; } diff --git a/src/main/cpp/jllama.h b/llama/src/main/cpp/jllama.h similarity index 100% rename from src/main/cpp/jllama.h rename to llama/src/main/cpp/jllama.h diff --git a/src/main/cpp/jni_helpers.hpp b/llama/src/main/cpp/jni_helpers.hpp similarity index 100% rename from src/main/cpp/jni_helpers.hpp rename to llama/src/main/cpp/jni_helpers.hpp diff --git a/src/main/cpp/json_helpers.hpp b/llama/src/main/cpp/json_helpers.hpp similarity index 100% rename from src/main/cpp/json_helpers.hpp rename to llama/src/main/cpp/json_helpers.hpp diff --git a/src/main/cpp/log_helpers.hpp b/llama/src/main/cpp/log_helpers.hpp similarity index 100% rename from src/main/cpp/log_helpers.hpp rename to llama/src/main/cpp/log_helpers.hpp diff --git a/src/main/cpp/tts_engine.cpp b/llama/src/main/cpp/tts_engine.cpp similarity index 100% rename from src/main/cpp/tts_engine.cpp rename to llama/src/main/cpp/tts_engine.cpp diff --git a/src/main/cpp/tts_engine.h b/llama/src/main/cpp/tts_engine.h similarity index 100% rename from src/main/cpp/tts_engine.h rename to llama/src/main/cpp/tts_engine.h diff --git a/src/main/cpp/tts_upstream.h b/llama/src/main/cpp/tts_upstream.h similarity index 100% rename from src/main/cpp/tts_upstream.h rename to llama/src/main/cpp/tts_upstream.h diff --git a/src/main/cpp/tts_wav.hpp b/llama/src/main/cpp/tts_wav.hpp similarity index 100% rename from src/main/cpp/tts_wav.hpp rename to llama/src/main/cpp/tts_wav.hpp diff --git a/src/main/cpp/utils.hpp b/llama/src/main/cpp/utils.hpp similarity index 100% rename from src/main/cpp/utils.hpp rename to llama/src/main/cpp/utils.hpp diff --git a/src/main/cpp/webui_stub/ui.h b/llama/src/main/cpp/webui_stub/ui.h similarity index 100% rename from src/main/cpp/webui_stub/ui.h rename to llama/src/main/cpp/webui_stub/ui.h diff --git a/src/main/java/module-info.java b/llama/src/main/java/module-info.java similarity index 100% rename from src/main/java/module-info.java rename to llama/src/main/java/module-info.java diff --git a/src/main/java/net/ladenthin/llama/LlamaIterable.java b/llama/src/main/java/net/ladenthin/llama/LlamaIterable.java similarity index 100% rename from src/main/java/net/ladenthin/llama/LlamaIterable.java rename to llama/src/main/java/net/ladenthin/llama/LlamaIterable.java diff --git a/src/main/java/net/ladenthin/llama/LlamaIterator.java b/llama/src/main/java/net/ladenthin/llama/LlamaIterator.java similarity index 100% rename from src/main/java/net/ladenthin/llama/LlamaIterator.java rename to llama/src/main/java/net/ladenthin/llama/LlamaIterator.java diff --git a/src/main/java/net/ladenthin/llama/LlamaModel.java b/llama/src/main/java/net/ladenthin/llama/LlamaModel.java similarity index 100% rename from src/main/java/net/ladenthin/llama/LlamaModel.java rename to llama/src/main/java/net/ladenthin/llama/LlamaModel.java diff --git a/src/main/java/net/ladenthin/llama/Session.java b/llama/src/main/java/net/ladenthin/llama/Session.java similarity index 100% rename from src/main/java/net/ladenthin/llama/Session.java rename to llama/src/main/java/net/ladenthin/llama/Session.java diff --git a/src/main/java/net/ladenthin/llama/SessionState.java b/llama/src/main/java/net/ladenthin/llama/SessionState.java similarity index 100% rename from src/main/java/net/ladenthin/llama/SessionState.java rename to llama/src/main/java/net/ladenthin/llama/SessionState.java diff --git a/src/main/java/net/ladenthin/llama/TextToSpeech.java b/llama/src/main/java/net/ladenthin/llama/TextToSpeech.java similarity index 100% rename from src/main/java/net/ladenthin/llama/TextToSpeech.java rename to llama/src/main/java/net/ladenthin/llama/TextToSpeech.java diff --git a/src/main/java/net/ladenthin/llama/ToolCallingAgent.java b/llama/src/main/java/net/ladenthin/llama/ToolCallingAgent.java similarity index 100% rename from src/main/java/net/ladenthin/llama/ToolCallingAgent.java rename to llama/src/main/java/net/ladenthin/llama/ToolCallingAgent.java diff --git a/src/main/java/net/ladenthin/llama/args/CacheType.java b/llama/src/main/java/net/ladenthin/llama/args/CacheType.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/CacheType.java rename to llama/src/main/java/net/ladenthin/llama/args/CacheType.java diff --git a/src/main/java/net/ladenthin/llama/args/CliArg.java b/llama/src/main/java/net/ladenthin/llama/args/CliArg.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/CliArg.java rename to llama/src/main/java/net/ladenthin/llama/args/CliArg.java diff --git a/src/main/java/net/ladenthin/llama/args/ContinuationMode.java b/llama/src/main/java/net/ladenthin/llama/args/ContinuationMode.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/ContinuationMode.java rename to llama/src/main/java/net/ladenthin/llama/args/ContinuationMode.java diff --git a/src/main/java/net/ladenthin/llama/args/GpuSplitMode.java b/llama/src/main/java/net/ladenthin/llama/args/GpuSplitMode.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/GpuSplitMode.java rename to llama/src/main/java/net/ladenthin/llama/args/GpuSplitMode.java diff --git a/src/main/java/net/ladenthin/llama/args/LogFormat.java b/llama/src/main/java/net/ladenthin/llama/args/LogFormat.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/LogFormat.java rename to llama/src/main/java/net/ladenthin/llama/args/LogFormat.java diff --git a/src/main/java/net/ladenthin/llama/args/MiroStat.java b/llama/src/main/java/net/ladenthin/llama/args/MiroStat.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/MiroStat.java rename to llama/src/main/java/net/ladenthin/llama/args/MiroStat.java diff --git a/src/main/java/net/ladenthin/llama/args/ModelFlag.java b/llama/src/main/java/net/ladenthin/llama/args/ModelFlag.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/ModelFlag.java rename to llama/src/main/java/net/ladenthin/llama/args/ModelFlag.java diff --git a/src/main/java/net/ladenthin/llama/args/NumaStrategy.java b/llama/src/main/java/net/ladenthin/llama/args/NumaStrategy.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/NumaStrategy.java rename to llama/src/main/java/net/ladenthin/llama/args/NumaStrategy.java diff --git a/src/main/java/net/ladenthin/llama/args/PoolingType.java b/llama/src/main/java/net/ladenthin/llama/args/PoolingType.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/PoolingType.java rename to llama/src/main/java/net/ladenthin/llama/args/PoolingType.java diff --git a/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java b/llama/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/ReasoningFormat.java rename to llama/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java diff --git a/src/main/java/net/ladenthin/llama/args/RopeScalingType.java b/llama/src/main/java/net/ladenthin/llama/args/RopeScalingType.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/RopeScalingType.java rename to llama/src/main/java/net/ladenthin/llama/args/RopeScalingType.java diff --git a/src/main/java/net/ladenthin/llama/args/Sampler.java b/llama/src/main/java/net/ladenthin/llama/args/Sampler.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/Sampler.java rename to llama/src/main/java/net/ladenthin/llama/args/Sampler.java diff --git a/src/main/java/net/ladenthin/llama/args/package-info.java b/llama/src/main/java/net/ladenthin/llama/args/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/args/package-info.java rename to llama/src/main/java/net/ladenthin/llama/args/package-info.java diff --git a/src/main/java/net/ladenthin/llama/callback/CancellationToken.java b/llama/src/main/java/net/ladenthin/llama/callback/CancellationToken.java similarity index 100% rename from src/main/java/net/ladenthin/llama/callback/CancellationToken.java rename to llama/src/main/java/net/ladenthin/llama/callback/CancellationToken.java diff --git a/src/main/java/net/ladenthin/llama/callback/LoadProgressCallback.java b/llama/src/main/java/net/ladenthin/llama/callback/LoadProgressCallback.java similarity index 100% rename from src/main/java/net/ladenthin/llama/callback/LoadProgressCallback.java rename to llama/src/main/java/net/ladenthin/llama/callback/LoadProgressCallback.java diff --git a/src/main/java/net/ladenthin/llama/callback/ToolHandler.java b/llama/src/main/java/net/ladenthin/llama/callback/ToolHandler.java similarity index 100% rename from src/main/java/net/ladenthin/llama/callback/ToolHandler.java rename to llama/src/main/java/net/ladenthin/llama/callback/ToolHandler.java diff --git a/src/main/java/net/ladenthin/llama/callback/package-info.java b/llama/src/main/java/net/ladenthin/llama/callback/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/callback/package-info.java rename to llama/src/main/java/net/ladenthin/llama/callback/package-info.java diff --git a/src/main/java/net/ladenthin/llama/exception/LlamaException.java b/llama/src/main/java/net/ladenthin/llama/exception/LlamaException.java similarity index 100% rename from src/main/java/net/ladenthin/llama/exception/LlamaException.java rename to llama/src/main/java/net/ladenthin/llama/exception/LlamaException.java diff --git a/src/main/java/net/ladenthin/llama/exception/ModelUnavailableException.java b/llama/src/main/java/net/ladenthin/llama/exception/ModelUnavailableException.java similarity index 100% rename from src/main/java/net/ladenthin/llama/exception/ModelUnavailableException.java rename to llama/src/main/java/net/ladenthin/llama/exception/ModelUnavailableException.java diff --git a/src/main/java/net/ladenthin/llama/exception/package-info.java b/llama/src/main/java/net/ladenthin/llama/exception/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/exception/package-info.java rename to llama/src/main/java/net/ladenthin/llama/exception/package-info.java diff --git a/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java b/llama/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java similarity index 100% rename from src/main/java/net/ladenthin/llama/json/ChatResponseParser.java rename to llama/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java diff --git a/src/main/java/net/ladenthin/llama/json/ChatStreamChunkParser.java b/llama/src/main/java/net/ladenthin/llama/json/ChatStreamChunkParser.java similarity index 100% rename from src/main/java/net/ladenthin/llama/json/ChatStreamChunkParser.java rename to llama/src/main/java/net/ladenthin/llama/json/ChatStreamChunkParser.java diff --git a/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java b/llama/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java similarity index 100% rename from src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java rename to llama/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java diff --git a/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java b/llama/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java similarity index 100% rename from src/main/java/net/ladenthin/llama/json/RerankResponseParser.java rename to llama/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java diff --git a/src/main/java/net/ladenthin/llama/json/TimingsLogger.java b/llama/src/main/java/net/ladenthin/llama/json/TimingsLogger.java similarity index 100% rename from src/main/java/net/ladenthin/llama/json/TimingsLogger.java rename to llama/src/main/java/net/ladenthin/llama/json/TimingsLogger.java diff --git a/src/main/java/net/ladenthin/llama/json/package-info.java b/llama/src/main/java/net/ladenthin/llama/json/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/json/package-info.java rename to llama/src/main/java/net/ladenthin/llama/json/package-info.java diff --git a/src/main/java/net/ladenthin/llama/loader/Java8CompatibilityHelper.java b/llama/src/main/java/net/ladenthin/llama/loader/Java8CompatibilityHelper.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/Java8CompatibilityHelper.java rename to llama/src/main/java/net/ladenthin/llama/loader/Java8CompatibilityHelper.java diff --git a/src/main/java/net/ladenthin/llama/loader/LlamaLoader.java b/llama/src/main/java/net/ladenthin/llama/loader/LlamaLoader.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/LlamaLoader.java rename to llama/src/main/java/net/ladenthin/llama/loader/LlamaLoader.java diff --git a/src/main/java/net/ladenthin/llama/loader/LlamaSystemProperties.java b/llama/src/main/java/net/ladenthin/llama/loader/LlamaSystemProperties.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/LlamaSystemProperties.java rename to llama/src/main/java/net/ladenthin/llama/loader/LlamaSystemProperties.java diff --git a/src/main/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetter.java b/llama/src/main/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetter.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetter.java rename to llama/src/main/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetter.java diff --git a/src/main/java/net/ladenthin/llama/loader/OSInfo.java b/llama/src/main/java/net/ladenthin/llama/loader/OSInfo.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/OSInfo.java rename to llama/src/main/java/net/ladenthin/llama/loader/OSInfo.java diff --git a/src/main/java/net/ladenthin/llama/loader/OfflineModelGuard.java b/llama/src/main/java/net/ladenthin/llama/loader/OfflineModelGuard.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/OfflineModelGuard.java rename to llama/src/main/java/net/ladenthin/llama/loader/OfflineModelGuard.java diff --git a/src/main/java/net/ladenthin/llama/loader/ProcessRunner.java b/llama/src/main/java/net/ladenthin/llama/loader/ProcessRunner.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/ProcessRunner.java rename to llama/src/main/java/net/ladenthin/llama/loader/ProcessRunner.java diff --git a/src/main/java/net/ladenthin/llama/loader/package-info.java b/llama/src/main/java/net/ladenthin/llama/loader/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/loader/package-info.java rename to llama/src/main/java/net/ladenthin/llama/loader/package-info.java diff --git a/src/main/java/net/ladenthin/llama/package-info.java b/llama/src/main/java/net/ladenthin/llama/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/package-info.java rename to llama/src/main/java/net/ladenthin/llama/package-info.java diff --git a/src/main/java/net/ladenthin/llama/parameters/ChatRequest.java b/llama/src/main/java/net/ladenthin/llama/parameters/ChatRequest.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/ChatRequest.java rename to llama/src/main/java/net/ladenthin/llama/parameters/ChatRequest.java diff --git a/src/main/java/net/ladenthin/llama/parameters/CliParameters.java b/llama/src/main/java/net/ladenthin/llama/parameters/CliParameters.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/CliParameters.java rename to llama/src/main/java/net/ladenthin/llama/parameters/CliParameters.java diff --git a/src/main/java/net/ladenthin/llama/parameters/InferenceParameters.java b/llama/src/main/java/net/ladenthin/llama/parameters/InferenceParameters.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/InferenceParameters.java rename to llama/src/main/java/net/ladenthin/llama/parameters/InferenceParameters.java diff --git a/src/main/java/net/ladenthin/llama/parameters/JsonParameters.java b/llama/src/main/java/net/ladenthin/llama/parameters/JsonParameters.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/JsonParameters.java rename to llama/src/main/java/net/ladenthin/llama/parameters/JsonParameters.java diff --git a/src/main/java/net/ladenthin/llama/parameters/ModelParameters.java b/llama/src/main/java/net/ladenthin/llama/parameters/ModelParameters.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/ModelParameters.java rename to llama/src/main/java/net/ladenthin/llama/parameters/ModelParameters.java diff --git a/src/main/java/net/ladenthin/llama/parameters/ParameterJsonSerializer.java b/llama/src/main/java/net/ladenthin/llama/parameters/ParameterJsonSerializer.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/ParameterJsonSerializer.java rename to llama/src/main/java/net/ladenthin/llama/parameters/ParameterJsonSerializer.java diff --git a/src/main/java/net/ladenthin/llama/parameters/package-info.java b/llama/src/main/java/net/ladenthin/llama/parameters/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/parameters/package-info.java rename to llama/src/main/java/net/ladenthin/llama/parameters/package-info.java diff --git a/src/main/java/net/ladenthin/llama/server/AnthropicApiSupport.java b/llama/src/main/java/net/ladenthin/llama/server/AnthropicApiSupport.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/AnthropicApiSupport.java rename to llama/src/main/java/net/ladenthin/llama/server/AnthropicApiSupport.java diff --git a/src/main/java/net/ladenthin/llama/server/AnthropicStreamTranslator.java b/llama/src/main/java/net/ladenthin/llama/server/AnthropicStreamTranslator.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/AnthropicStreamTranslator.java rename to llama/src/main/java/net/ladenthin/llama/server/AnthropicStreamTranslator.java diff --git a/src/main/java/net/ladenthin/llama/server/ChunkSink.java b/llama/src/main/java/net/ladenthin/llama/server/ChunkSink.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/ChunkSink.java rename to llama/src/main/java/net/ladenthin/llama/server/ChunkSink.java diff --git a/src/main/java/net/ladenthin/llama/server/LlamaModelBackend.java b/llama/src/main/java/net/ladenthin/llama/server/LlamaModelBackend.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/LlamaModelBackend.java rename to llama/src/main/java/net/ladenthin/llama/server/LlamaModelBackend.java diff --git a/src/main/java/net/ladenthin/llama/server/NativeServer.java b/llama/src/main/java/net/ladenthin/llama/server/NativeServer.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/NativeServer.java rename to llama/src/main/java/net/ladenthin/llama/server/NativeServer.java diff --git a/src/main/java/net/ladenthin/llama/server/OaiRerankSupport.java b/llama/src/main/java/net/ladenthin/llama/server/OaiRerankSupport.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OaiRerankSupport.java rename to llama/src/main/java/net/ladenthin/llama/server/OaiRerankSupport.java diff --git a/src/main/java/net/ladenthin/llama/server/OllamaApiSupport.java b/llama/src/main/java/net/ladenthin/llama/server/OllamaApiSupport.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OllamaApiSupport.java rename to llama/src/main/java/net/ladenthin/llama/server/OllamaApiSupport.java diff --git a/src/main/java/net/ladenthin/llama/server/OpenAiBackend.java b/llama/src/main/java/net/ladenthin/llama/server/OpenAiBackend.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OpenAiBackend.java rename to llama/src/main/java/net/ladenthin/llama/server/OpenAiBackend.java diff --git a/src/main/java/net/ladenthin/llama/server/OpenAiCompatServer.java b/llama/src/main/java/net/ladenthin/llama/server/OpenAiCompatServer.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OpenAiCompatServer.java rename to llama/src/main/java/net/ladenthin/llama/server/OpenAiCompatServer.java diff --git a/src/main/java/net/ladenthin/llama/server/OpenAiRequestMapper.java b/llama/src/main/java/net/ladenthin/llama/server/OpenAiRequestMapper.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OpenAiRequestMapper.java rename to llama/src/main/java/net/ladenthin/llama/server/OpenAiRequestMapper.java diff --git a/src/main/java/net/ladenthin/llama/server/OpenAiServerCli.java b/llama/src/main/java/net/ladenthin/llama/server/OpenAiServerCli.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OpenAiServerCli.java rename to llama/src/main/java/net/ladenthin/llama/server/OpenAiServerCli.java diff --git a/src/main/java/net/ladenthin/llama/server/OpenAiServerConfig.java b/llama/src/main/java/net/ladenthin/llama/server/OpenAiServerConfig.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OpenAiServerConfig.java rename to llama/src/main/java/net/ladenthin/llama/server/OpenAiServerConfig.java diff --git a/src/main/java/net/ladenthin/llama/server/OpenAiSseFormatter.java b/llama/src/main/java/net/ladenthin/llama/server/OpenAiSseFormatter.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/OpenAiSseFormatter.java rename to llama/src/main/java/net/ladenthin/llama/server/OpenAiSseFormatter.java diff --git a/src/main/java/net/ladenthin/llama/server/ResponsesApiSupport.java b/llama/src/main/java/net/ladenthin/llama/server/ResponsesApiSupport.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/ResponsesApiSupport.java rename to llama/src/main/java/net/ladenthin/llama/server/ResponsesApiSupport.java diff --git a/src/main/java/net/ladenthin/llama/server/ResponsesStreamTranslator.java b/llama/src/main/java/net/ladenthin/llama/server/ResponsesStreamTranslator.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/ResponsesStreamTranslator.java rename to llama/src/main/java/net/ladenthin/llama/server/ResponsesStreamTranslator.java diff --git a/src/main/java/net/ladenthin/llama/server/ToolCallDeltaAccumulator.java b/llama/src/main/java/net/ladenthin/llama/server/ToolCallDeltaAccumulator.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/ToolCallDeltaAccumulator.java rename to llama/src/main/java/net/ladenthin/llama/server/ToolCallDeltaAccumulator.java diff --git a/src/main/java/net/ladenthin/llama/server/package-info.java b/llama/src/main/java/net/ladenthin/llama/server/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/server/package-info.java rename to llama/src/main/java/net/ladenthin/llama/server/package-info.java diff --git a/src/main/java/net/ladenthin/llama/value/ChatChoice.java b/llama/src/main/java/net/ladenthin/llama/value/ChatChoice.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ChatChoice.java rename to llama/src/main/java/net/ladenthin/llama/value/ChatChoice.java diff --git a/src/main/java/net/ladenthin/llama/value/ChatMessage.java b/llama/src/main/java/net/ladenthin/llama/value/ChatMessage.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ChatMessage.java rename to llama/src/main/java/net/ladenthin/llama/value/ChatMessage.java diff --git a/src/main/java/net/ladenthin/llama/value/ChatResponse.java b/llama/src/main/java/net/ladenthin/llama/value/ChatResponse.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ChatResponse.java rename to llama/src/main/java/net/ladenthin/llama/value/ChatResponse.java diff --git a/src/main/java/net/ladenthin/llama/value/ChatTranscript.java b/llama/src/main/java/net/ladenthin/llama/value/ChatTranscript.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ChatTranscript.java rename to llama/src/main/java/net/ladenthin/llama/value/ChatTranscript.java diff --git a/src/main/java/net/ladenthin/llama/value/CompletionResult.java b/llama/src/main/java/net/ladenthin/llama/value/CompletionResult.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/CompletionResult.java rename to llama/src/main/java/net/ladenthin/llama/value/CompletionResult.java diff --git a/src/main/java/net/ladenthin/llama/value/ContentPart.java b/llama/src/main/java/net/ladenthin/llama/value/ContentPart.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ContentPart.java rename to llama/src/main/java/net/ladenthin/llama/value/ContentPart.java diff --git a/src/main/java/net/ladenthin/llama/value/LlamaOutput.java b/llama/src/main/java/net/ladenthin/llama/value/LlamaOutput.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/LlamaOutput.java rename to llama/src/main/java/net/ladenthin/llama/value/LlamaOutput.java diff --git a/src/main/java/net/ladenthin/llama/value/LogLevel.java b/llama/src/main/java/net/ladenthin/llama/value/LogLevel.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/LogLevel.java rename to llama/src/main/java/net/ladenthin/llama/value/LogLevel.java diff --git a/src/main/java/net/ladenthin/llama/value/ModelMeta.java b/llama/src/main/java/net/ladenthin/llama/value/ModelMeta.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ModelMeta.java rename to llama/src/main/java/net/ladenthin/llama/value/ModelMeta.java diff --git a/src/main/java/net/ladenthin/llama/value/Pair.java b/llama/src/main/java/net/ladenthin/llama/value/Pair.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/Pair.java rename to llama/src/main/java/net/ladenthin/llama/value/Pair.java diff --git a/src/main/java/net/ladenthin/llama/value/ServerMetrics.java b/llama/src/main/java/net/ladenthin/llama/value/ServerMetrics.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ServerMetrics.java rename to llama/src/main/java/net/ladenthin/llama/value/ServerMetrics.java diff --git a/src/main/java/net/ladenthin/llama/value/SlotMetrics.java b/llama/src/main/java/net/ladenthin/llama/value/SlotMetrics.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/SlotMetrics.java rename to llama/src/main/java/net/ladenthin/llama/value/SlotMetrics.java diff --git a/src/main/java/net/ladenthin/llama/value/StopReason.java b/llama/src/main/java/net/ladenthin/llama/value/StopReason.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/StopReason.java rename to llama/src/main/java/net/ladenthin/llama/value/StopReason.java diff --git a/src/main/java/net/ladenthin/llama/value/Timings.java b/llama/src/main/java/net/ladenthin/llama/value/Timings.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/Timings.java rename to llama/src/main/java/net/ladenthin/llama/value/Timings.java diff --git a/src/main/java/net/ladenthin/llama/value/TokenLogprob.java b/llama/src/main/java/net/ladenthin/llama/value/TokenLogprob.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/TokenLogprob.java rename to llama/src/main/java/net/ladenthin/llama/value/TokenLogprob.java diff --git a/src/main/java/net/ladenthin/llama/value/ToolCall.java b/llama/src/main/java/net/ladenthin/llama/value/ToolCall.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ToolCall.java rename to llama/src/main/java/net/ladenthin/llama/value/ToolCall.java diff --git a/src/main/java/net/ladenthin/llama/value/ToolDefinition.java b/llama/src/main/java/net/ladenthin/llama/value/ToolDefinition.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/ToolDefinition.java rename to llama/src/main/java/net/ladenthin/llama/value/ToolDefinition.java diff --git a/src/main/java/net/ladenthin/llama/value/Usage.java b/llama/src/main/java/net/ladenthin/llama/value/Usage.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/Usage.java rename to llama/src/main/java/net/ladenthin/llama/value/Usage.java diff --git a/src/main/java/net/ladenthin/llama/value/package-info.java b/llama/src/main/java/net/ladenthin/llama/value/package-info.java similarity index 100% rename from src/main/java/net/ladenthin/llama/value/package-info.java rename to llama/src/main/java/net/ladenthin/llama/value/package-info.java diff --git a/src/test/cpp/test_jni_helpers.cpp b/llama/src/test/cpp/test_jni_helpers.cpp similarity index 100% rename from src/test/cpp/test_jni_helpers.cpp rename to llama/src/test/cpp/test_jni_helpers.cpp diff --git a/src/test/cpp/test_json_helpers.cpp b/llama/src/test/cpp/test_json_helpers.cpp similarity index 100% rename from src/test/cpp/test_json_helpers.cpp rename to llama/src/test/cpp/test_json_helpers.cpp diff --git a/src/test/cpp/test_log_helpers.cpp b/llama/src/test/cpp/test_log_helpers.cpp similarity index 100% rename from src/test/cpp/test_log_helpers.cpp rename to llama/src/test/cpp/test_log_helpers.cpp diff --git a/src/test/cpp/test_server.cpp b/llama/src/test/cpp/test_server.cpp similarity index 100% rename from src/test/cpp/test_server.cpp rename to llama/src/test/cpp/test_server.cpp diff --git a/src/test/cpp/test_tts_wav.cpp b/llama/src/test/cpp/test_tts_wav.cpp similarity index 100% rename from src/test/cpp/test_tts_wav.cpp rename to llama/src/test/cpp/test_tts_wav.cpp diff --git a/src/test/cpp/test_utils.cpp b/llama/src/test/cpp/test_utils.cpp similarity index 100% rename from src/test/cpp/test_utils.cpp rename to llama/src/test/cpp/test_utils.cpp diff --git a/src/test/java/examples/ChatExample.java b/llama/src/test/java/examples/ChatExample.java similarity index 100% rename from src/test/java/examples/ChatExample.java rename to llama/src/test/java/examples/ChatExample.java diff --git a/src/test/java/examples/GrammarExample.java b/llama/src/test/java/examples/GrammarExample.java similarity index 100% rename from src/test/java/examples/GrammarExample.java rename to llama/src/test/java/examples/GrammarExample.java diff --git a/src/test/java/examples/InfillExample.java b/llama/src/test/java/examples/InfillExample.java similarity index 100% rename from src/test/java/examples/InfillExample.java rename to llama/src/test/java/examples/InfillExample.java diff --git a/src/test/java/examples/MainExample.java b/llama/src/test/java/examples/MainExample.java similarity index 100% rename from src/test/java/examples/MainExample.java rename to llama/src/test/java/examples/MainExample.java diff --git a/src/test/java/examples/OpenAiServerExample.java b/llama/src/test/java/examples/OpenAiServerExample.java similarity index 100% rename from src/test/java/examples/OpenAiServerExample.java rename to llama/src/test/java/examples/OpenAiServerExample.java diff --git a/src/test/java/net/ladenthin/llama/AudioInputIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/AudioInputIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/AudioInputIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/AudioInputIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java b/llama/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ChatAdvancedTest.java rename to llama/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java diff --git a/src/test/java/net/ladenthin/llama/ChatScenarioTest.java b/llama/src/test/java/net/ladenthin/llama/ChatScenarioTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ChatScenarioTest.java rename to llama/src/test/java/net/ladenthin/llama/ChatScenarioTest.java diff --git a/src/test/java/net/ladenthin/llama/ClaudeGenerated.java b/llama/src/test/java/net/ladenthin/llama/ClaudeGenerated.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ClaudeGenerated.java rename to llama/src/test/java/net/ladenthin/llama/ClaudeGenerated.java diff --git a/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java b/llama/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java rename to llama/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java diff --git a/src/test/java/net/ladenthin/llama/ErrorHandlingTest.java b/llama/src/test/java/net/ladenthin/llama/ErrorHandlingTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ErrorHandlingTest.java rename to llama/src/test/java/net/ladenthin/llama/ErrorHandlingTest.java diff --git a/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java b/llama/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java rename to llama/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java diff --git a/src/test/java/net/ladenthin/llama/LlamaEmbeddingsTest.java b/llama/src/test/java/net/ladenthin/llama/LlamaEmbeddingsTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/LlamaEmbeddingsTest.java rename to llama/src/test/java/net/ladenthin/llama/LlamaEmbeddingsTest.java diff --git a/src/test/java/net/ladenthin/llama/LlamaModelOfflineTest.java b/llama/src/test/java/net/ladenthin/llama/LlamaModelOfflineTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/LlamaModelOfflineTest.java rename to llama/src/test/java/net/ladenthin/llama/LlamaModelOfflineTest.java diff --git a/src/test/java/net/ladenthin/llama/LlamaModelTest.java b/llama/src/test/java/net/ladenthin/llama/LlamaModelTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/LlamaModelTest.java rename to llama/src/test/java/net/ladenthin/llama/LlamaModelTest.java diff --git a/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java b/llama/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java similarity index 100% rename from src/test/java/net/ladenthin/llama/LlamaParameterProperties.java rename to llama/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java diff --git a/src/test/java/net/ladenthin/llama/MemoryManagementTest.java b/llama/src/test/java/net/ladenthin/llama/MemoryManagementTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/MemoryManagementTest.java rename to llama/src/test/java/net/ladenthin/llama/MemoryManagementTest.java diff --git a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java b/llama/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java rename to llama/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java diff --git a/src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java b/llama/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java rename to llama/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java diff --git a/src/test/java/net/ladenthin/llama/RerankingModelTest.java b/llama/src/test/java/net/ladenthin/llama/RerankingModelTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/RerankingModelTest.java rename to llama/src/test/java/net/ladenthin/llama/RerankingModelTest.java diff --git a/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java b/llama/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java rename to llama/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java diff --git a/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java b/llama/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java rename to llama/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java diff --git a/src/test/java/net/ladenthin/llama/SessionStateTest.java b/llama/src/test/java/net/ladenthin/llama/SessionStateTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/SessionStateTest.java rename to llama/src/test/java/net/ladenthin/llama/SessionStateTest.java diff --git a/src/test/java/net/ladenthin/llama/SessionTest.java b/llama/src/test/java/net/ladenthin/llama/SessionTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/SessionTest.java rename to llama/src/test/java/net/ladenthin/llama/SessionTest.java diff --git a/src/test/java/net/ladenthin/llama/TestConstants.java b/llama/src/test/java/net/ladenthin/llama/TestConstants.java similarity index 100% rename from src/test/java/net/ladenthin/llama/TestConstants.java rename to llama/src/test/java/net/ladenthin/llama/TestConstants.java diff --git a/src/test/java/net/ladenthin/llama/ToolCallingAgentTest.java b/llama/src/test/java/net/ladenthin/llama/ToolCallingAgentTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ToolCallingAgentTest.java rename to llama/src/test/java/net/ladenthin/llama/ToolCallingAgentTest.java diff --git a/src/test/java/net/ladenthin/llama/ToolCallingIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/ToolCallingIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/ToolCallingIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/ToolCallingIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/TtsIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/TtsIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/TtsIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/TtsIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/args/AbstractCliArgEnumTest.java b/llama/src/test/java/net/ladenthin/llama/args/AbstractCliArgEnumTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/AbstractCliArgEnumTest.java rename to llama/src/test/java/net/ladenthin/llama/args/AbstractCliArgEnumTest.java diff --git a/src/test/java/net/ladenthin/llama/args/CacheTypeTest.java b/llama/src/test/java/net/ladenthin/llama/args/CacheTypeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/CacheTypeTest.java rename to llama/src/test/java/net/ladenthin/llama/args/CacheTypeTest.java diff --git a/src/test/java/net/ladenthin/llama/args/ContinuationModeTest.java b/llama/src/test/java/net/ladenthin/llama/args/ContinuationModeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/ContinuationModeTest.java rename to llama/src/test/java/net/ladenthin/llama/args/ContinuationModeTest.java diff --git a/src/test/java/net/ladenthin/llama/args/GpuSplitModeTest.java b/llama/src/test/java/net/ladenthin/llama/args/GpuSplitModeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/GpuSplitModeTest.java rename to llama/src/test/java/net/ladenthin/llama/args/GpuSplitModeTest.java diff --git a/src/test/java/net/ladenthin/llama/args/LogFormatTest.java b/llama/src/test/java/net/ladenthin/llama/args/LogFormatTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/LogFormatTest.java rename to llama/src/test/java/net/ladenthin/llama/args/LogFormatTest.java diff --git a/src/test/java/net/ladenthin/llama/args/MiroStatTest.java b/llama/src/test/java/net/ladenthin/llama/args/MiroStatTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/MiroStatTest.java rename to llama/src/test/java/net/ladenthin/llama/args/MiroStatTest.java diff --git a/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java b/llama/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/ModelFlagTest.java rename to llama/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java diff --git a/src/test/java/net/ladenthin/llama/args/NumaStrategyTest.java b/llama/src/test/java/net/ladenthin/llama/args/NumaStrategyTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/NumaStrategyTest.java rename to llama/src/test/java/net/ladenthin/llama/args/NumaStrategyTest.java diff --git a/src/test/java/net/ladenthin/llama/args/PoolingTypeTest.java b/llama/src/test/java/net/ladenthin/llama/args/PoolingTypeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/PoolingTypeTest.java rename to llama/src/test/java/net/ladenthin/llama/args/PoolingTypeTest.java diff --git a/src/test/java/net/ladenthin/llama/args/ReasoningFormatTest.java b/llama/src/test/java/net/ladenthin/llama/args/ReasoningFormatTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/ReasoningFormatTest.java rename to llama/src/test/java/net/ladenthin/llama/args/ReasoningFormatTest.java diff --git a/src/test/java/net/ladenthin/llama/args/RopeScalingTypeTest.java b/llama/src/test/java/net/ladenthin/llama/args/RopeScalingTypeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/RopeScalingTypeTest.java rename to llama/src/test/java/net/ladenthin/llama/args/RopeScalingTypeTest.java diff --git a/src/test/java/net/ladenthin/llama/args/SamplerTest.java b/llama/src/test/java/net/ladenthin/llama/args/SamplerTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/args/SamplerTest.java rename to llama/src/test/java/net/ladenthin/llama/args/SamplerTest.java diff --git a/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java b/llama/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java similarity index 100% rename from src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java rename to llama/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java diff --git a/src/test/java/net/ladenthin/llama/callback/CancellationTokenLincheckTest.java b/llama/src/test/java/net/ladenthin/llama/callback/CancellationTokenLincheckTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/callback/CancellationTokenLincheckTest.java rename to llama/src/test/java/net/ladenthin/llama/callback/CancellationTokenLincheckTest.java diff --git a/src/test/java/net/ladenthin/llama/callback/CancellationTokenTest.java b/llama/src/test/java/net/ladenthin/llama/callback/CancellationTokenTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/callback/CancellationTokenTest.java rename to llama/src/test/java/net/ladenthin/llama/callback/CancellationTokenTest.java diff --git a/src/test/java/net/ladenthin/llama/callback/LoadProgressCallbackTest.java b/llama/src/test/java/net/ladenthin/llama/callback/LoadProgressCallbackTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/callback/LoadProgressCallbackTest.java rename to llama/src/test/java/net/ladenthin/llama/callback/LoadProgressCallbackTest.java diff --git a/src/test/java/net/ladenthin/llama/exception/LlamaExceptionTest.java b/llama/src/test/java/net/ladenthin/llama/exception/LlamaExceptionTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/exception/LlamaExceptionTest.java rename to llama/src/test/java/net/ladenthin/llama/exception/LlamaExceptionTest.java diff --git a/src/test/java/net/ladenthin/llama/exception/ModelUnavailableExceptionTest.java b/llama/src/test/java/net/ladenthin/llama/exception/ModelUnavailableExceptionTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/exception/ModelUnavailableExceptionTest.java rename to llama/src/test/java/net/ladenthin/llama/exception/ModelUnavailableExceptionTest.java diff --git a/src/test/java/net/ladenthin/llama/jcstress/CancellationTokenRace.java b/llama/src/test/java/net/ladenthin/llama/jcstress/CancellationTokenRace.java similarity index 100% rename from src/test/java/net/ladenthin/llama/jcstress/CancellationTokenRace.java rename to llama/src/test/java/net/ladenthin/llama/jcstress/CancellationTokenRace.java diff --git a/src/test/java/net/ladenthin/llama/json/ChatResponseParserTest.java b/llama/src/test/java/net/ladenthin/llama/json/ChatResponseParserTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/json/ChatResponseParserTest.java rename to llama/src/test/java/net/ladenthin/llama/json/ChatResponseParserTest.java diff --git a/src/test/java/net/ladenthin/llama/json/ChatStreamChunkParserTest.java b/llama/src/test/java/net/ladenthin/llama/json/ChatStreamChunkParserTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/json/ChatStreamChunkParserTest.java rename to llama/src/test/java/net/ladenthin/llama/json/ChatStreamChunkParserTest.java diff --git a/src/test/java/net/ladenthin/llama/json/CompletionResponseParserTest.java b/llama/src/test/java/net/ladenthin/llama/json/CompletionResponseParserTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/json/CompletionResponseParserTest.java rename to llama/src/test/java/net/ladenthin/llama/json/CompletionResponseParserTest.java diff --git a/src/test/java/net/ladenthin/llama/json/ParameterJsonSerializerTest.java b/llama/src/test/java/net/ladenthin/llama/json/ParameterJsonSerializerTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/json/ParameterJsonSerializerTest.java rename to llama/src/test/java/net/ladenthin/llama/json/ParameterJsonSerializerTest.java diff --git a/src/test/java/net/ladenthin/llama/json/RerankResponseParserTest.java b/llama/src/test/java/net/ladenthin/llama/json/RerankResponseParserTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/json/RerankResponseParserTest.java rename to llama/src/test/java/net/ladenthin/llama/json/RerankResponseParserTest.java diff --git a/src/test/java/net/ladenthin/llama/json/TimingsLoggerTest.java b/llama/src/test/java/net/ladenthin/llama/json/TimingsLoggerTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/json/TimingsLoggerTest.java rename to llama/src/test/java/net/ladenthin/llama/json/TimingsLoggerTest.java diff --git a/src/test/java/net/ladenthin/llama/loader/LlamaLoaderTest.java b/llama/src/test/java/net/ladenthin/llama/loader/LlamaLoaderTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/loader/LlamaLoaderTest.java rename to llama/src/test/java/net/ladenthin/llama/loader/LlamaLoaderTest.java diff --git a/src/test/java/net/ladenthin/llama/loader/LoggingSmokeTest.java b/llama/src/test/java/net/ladenthin/llama/loader/LoggingSmokeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/loader/LoggingSmokeTest.java rename to llama/src/test/java/net/ladenthin/llama/loader/LoggingSmokeTest.java diff --git a/src/test/java/net/ladenthin/llama/loader/NativeLibraryLoadSmokeTest.java b/llama/src/test/java/net/ladenthin/llama/loader/NativeLibraryLoadSmokeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/loader/NativeLibraryLoadSmokeTest.java rename to llama/src/test/java/net/ladenthin/llama/loader/NativeLibraryLoadSmokeTest.java diff --git a/src/test/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetterTest.java b/llama/src/test/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetterTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetterTest.java rename to llama/src/test/java/net/ladenthin/llama/loader/NativeLibraryPermissionSetterTest.java diff --git a/src/test/java/net/ladenthin/llama/loader/OSInfoTest.java b/llama/src/test/java/net/ladenthin/llama/loader/OSInfoTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/loader/OSInfoTest.java rename to llama/src/test/java/net/ladenthin/llama/loader/OSInfoTest.java diff --git a/src/test/java/net/ladenthin/llama/parameters/ChatRequestTest.java b/llama/src/test/java/net/ladenthin/llama/parameters/ChatRequestTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/parameters/ChatRequestTest.java rename to llama/src/test/java/net/ladenthin/llama/parameters/ChatRequestTest.java diff --git a/src/test/java/net/ladenthin/llama/parameters/InferenceParametersTest.java b/llama/src/test/java/net/ladenthin/llama/parameters/InferenceParametersTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/parameters/InferenceParametersTest.java rename to llama/src/test/java/net/ladenthin/llama/parameters/InferenceParametersTest.java diff --git a/src/test/java/net/ladenthin/llama/parameters/JsonEndpointParametersTest.java b/llama/src/test/java/net/ladenthin/llama/parameters/JsonEndpointParametersTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/parameters/JsonEndpointParametersTest.java rename to llama/src/test/java/net/ladenthin/llama/parameters/JsonEndpointParametersTest.java diff --git a/src/test/java/net/ladenthin/llama/parameters/JsonParametersTest.java b/llama/src/test/java/net/ladenthin/llama/parameters/JsonParametersTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/parameters/JsonParametersTest.java rename to llama/src/test/java/net/ladenthin/llama/parameters/JsonParametersTest.java diff --git a/src/test/java/net/ladenthin/llama/parameters/ModelParametersExtendedTest.java b/llama/src/test/java/net/ladenthin/llama/parameters/ModelParametersExtendedTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/parameters/ModelParametersExtendedTest.java rename to llama/src/test/java/net/ladenthin/llama/parameters/ModelParametersExtendedTest.java diff --git a/src/test/java/net/ladenthin/llama/parameters/ModelParametersTest.java b/llama/src/test/java/net/ladenthin/llama/parameters/ModelParametersTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/parameters/ModelParametersTest.java rename to llama/src/test/java/net/ladenthin/llama/parameters/ModelParametersTest.java diff --git a/src/test/java/net/ladenthin/llama/server/AnthropicApiSupportTest.java b/llama/src/test/java/net/ladenthin/llama/server/AnthropicApiSupportTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/AnthropicApiSupportTest.java rename to llama/src/test/java/net/ladenthin/llama/server/AnthropicApiSupportTest.java diff --git a/src/test/java/net/ladenthin/llama/server/AnthropicStreamTranslatorTest.java b/llama/src/test/java/net/ladenthin/llama/server/AnthropicStreamTranslatorTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/AnthropicStreamTranslatorTest.java rename to llama/src/test/java/net/ladenthin/llama/server/AnthropicStreamTranslatorTest.java diff --git a/src/test/java/net/ladenthin/llama/server/NativeServerSmokeTest.java b/llama/src/test/java/net/ladenthin/llama/server/NativeServerSmokeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/NativeServerSmokeTest.java rename to llama/src/test/java/net/ladenthin/llama/server/NativeServerSmokeTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OaiRerankSupportTest.java b/llama/src/test/java/net/ladenthin/llama/server/OaiRerankSupportTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OaiRerankSupportTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OaiRerankSupportTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OllamaApiSupportTest.java b/llama/src/test/java/net/ladenthin/llama/server/OllamaApiSupportTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OllamaApiSupportTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OllamaApiSupportTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiCompatServerHttpTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiCompatServerHttpTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiCompatServerHttpTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiCompatServerHttpTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiCompatServerIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiCompatServerIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiCompatServerIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiCompatServerIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiRequestMapperTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiRequestMapperTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiRequestMapperTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiRequestMapperTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerCliTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerCliTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerCliTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerCliTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerCompletionIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerCompletionIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerCompletionIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerCompletionIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerConfigTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerConfigTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerConfigTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerConfigTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerEmbeddingsIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerEmbeddingsIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerEmbeddingsIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerEmbeddingsIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerRerankIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerRerankIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerRerankIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerRerankIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerTestSupport.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerTestSupport.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerTestSupport.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerTestSupport.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiServerToolCallingIntegrationTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiServerToolCallingIntegrationTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiServerToolCallingIntegrationTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiServerToolCallingIntegrationTest.java diff --git a/src/test/java/net/ladenthin/llama/server/OpenAiSseFormatterTest.java b/llama/src/test/java/net/ladenthin/llama/server/OpenAiSseFormatterTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/OpenAiSseFormatterTest.java rename to llama/src/test/java/net/ladenthin/llama/server/OpenAiSseFormatterTest.java diff --git a/src/test/java/net/ladenthin/llama/server/ResponsesApiSupportTest.java b/llama/src/test/java/net/ladenthin/llama/server/ResponsesApiSupportTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/ResponsesApiSupportTest.java rename to llama/src/test/java/net/ladenthin/llama/server/ResponsesApiSupportTest.java diff --git a/src/test/java/net/ladenthin/llama/server/ResponsesStreamTranslatorTest.java b/llama/src/test/java/net/ladenthin/llama/server/ResponsesStreamTranslatorTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/ResponsesStreamTranslatorTest.java rename to llama/src/test/java/net/ladenthin/llama/server/ResponsesStreamTranslatorTest.java diff --git a/src/test/java/net/ladenthin/llama/server/ToolCallDeltaAccumulatorTest.java b/llama/src/test/java/net/ladenthin/llama/server/ToolCallDeltaAccumulatorTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/server/ToolCallDeltaAccumulatorTest.java rename to llama/src/test/java/net/ladenthin/llama/server/ToolCallDeltaAccumulatorTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ChatChoiceTest.java b/llama/src/test/java/net/ladenthin/llama/value/ChatChoiceTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ChatChoiceTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ChatChoiceTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ChatMessageTest.java b/llama/src/test/java/net/ladenthin/llama/value/ChatMessageTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ChatMessageTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ChatMessageTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ChatResponseTest.java b/llama/src/test/java/net/ladenthin/llama/value/ChatResponseTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ChatResponseTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ChatResponseTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ChatTranscriptTest.java b/llama/src/test/java/net/ladenthin/llama/value/ChatTranscriptTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ChatTranscriptTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ChatTranscriptTest.java diff --git a/src/test/java/net/ladenthin/llama/value/CompletionResultTest.java b/llama/src/test/java/net/ladenthin/llama/value/CompletionResultTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/CompletionResultTest.java rename to llama/src/test/java/net/ladenthin/llama/value/CompletionResultTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ContentPartTest.java b/llama/src/test/java/net/ladenthin/llama/value/ContentPartTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ContentPartTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ContentPartTest.java diff --git a/src/test/java/net/ladenthin/llama/value/LlamaOutputTest.java b/llama/src/test/java/net/ladenthin/llama/value/LlamaOutputTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/LlamaOutputTest.java rename to llama/src/test/java/net/ladenthin/llama/value/LlamaOutputTest.java diff --git a/src/test/java/net/ladenthin/llama/value/LogLevelTest.java b/llama/src/test/java/net/ladenthin/llama/value/LogLevelTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/LogLevelTest.java rename to llama/src/test/java/net/ladenthin/llama/value/LogLevelTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ModelMetaTest.java b/llama/src/test/java/net/ladenthin/llama/value/ModelMetaTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ModelMetaTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ModelMetaTest.java diff --git a/src/test/java/net/ladenthin/llama/value/PairTest.java b/llama/src/test/java/net/ladenthin/llama/value/PairTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/PairTest.java rename to llama/src/test/java/net/ladenthin/llama/value/PairTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ServerMetricsTest.java b/llama/src/test/java/net/ladenthin/llama/value/ServerMetricsTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ServerMetricsTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ServerMetricsTest.java diff --git a/src/test/java/net/ladenthin/llama/value/StopReasonTest.java b/llama/src/test/java/net/ladenthin/llama/value/StopReasonTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/StopReasonTest.java rename to llama/src/test/java/net/ladenthin/llama/value/StopReasonTest.java diff --git a/src/test/java/net/ladenthin/llama/value/TimingsTest.java b/llama/src/test/java/net/ladenthin/llama/value/TimingsTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/TimingsTest.java rename to llama/src/test/java/net/ladenthin/llama/value/TimingsTest.java diff --git a/src/test/java/net/ladenthin/llama/value/TokenLogprobTest.java b/llama/src/test/java/net/ladenthin/llama/value/TokenLogprobTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/TokenLogprobTest.java rename to llama/src/test/java/net/ladenthin/llama/value/TokenLogprobTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ToolCallTest.java b/llama/src/test/java/net/ladenthin/llama/value/ToolCallTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ToolCallTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ToolCallTest.java diff --git a/src/test/java/net/ladenthin/llama/value/ToolDefinitionTest.java b/llama/src/test/java/net/ladenthin/llama/value/ToolDefinitionTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/ToolDefinitionTest.java rename to llama/src/test/java/net/ladenthin/llama/value/ToolDefinitionTest.java diff --git a/src/test/java/net/ladenthin/llama/value/UsageTest.java b/llama/src/test/java/net/ladenthin/llama/value/UsageTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/value/UsageTest.java rename to llama/src/test/java/net/ladenthin/llama/value/UsageTest.java diff --git a/src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java b/llama/src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java rename to llama/src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java diff --git a/src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java b/llama/src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java similarity index 100% rename from src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java rename to llama/src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java diff --git a/src/test/resources/images/README.md b/llama/src/test/resources/images/README.md similarity index 100% rename from src/test/resources/images/README.md rename to llama/src/test/resources/images/README.md diff --git a/src/test/resources/images/test-image.jpg b/llama/src/test/resources/images/test-image.jpg similarity index 100% rename from src/test/resources/images/test-image.jpg rename to llama/src/test/resources/images/test-image.jpg diff --git a/pom.xml b/pom.xml index d8c1492c..c73ba70d 100644 --- a/pom.xml +++ b/pom.xml @@ -10,14 +10,21 @@ SPDX-License-Identifier: MIT xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 + net.ladenthin - llama + llama-parent 5.0.4-SNAPSHOT - jar + pom ${project.groupId}:${project.artifactId} - Java Bindings for llama.cpp - A Port of Facebook's LLaMA model - in C/C++. + Aggregator POM for java-llama.cpp: the native JNI binding + (net.ladenthin:llama) and its sibling integrations. https://github.com/bernardladenthin/java-llama.cpp @@ -48,671 +55,20 @@ SPDX-License-Identifier: MIT - - bernardladenthin - 5.19.0 - 1.0.0 - 1.18.46 - 2.50.0 - 0.13.7 - 4.2.0 - 2.22.0 - 3.8.6 - 2.0.18 - 1.5.37 - 1.27 - 6.1.1 - 3.0 - 1.37 - 0.16 - 3.6 - 2.12.6 - 1.2.28 - - 1.9.3 - 1.4.2 - 4.10.2.0 - 7.7.4 - 1.14.0 - 3.7.0 - 2.94.0 - UTF-8 - ${git.commit.time} - + + llama + llama-langchain4j + - - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - - - - org.projectlombok - lombok - ${lombok.version} - provided - - - org.junit.jupiter - junit-jupiter - ${junit.version} - test - - - org.hamcrest - hamcrest - ${hamcrest.version} - test - - - net.jqwik - jqwik - ${jqwik.version} - test - - - com.tngtech.archunit - archunit-junit5 - ${archunit.version} - test - - - org.jspecify - jspecify - ${jspecify.version} - - - org.checkerframework - checker-qual - ${checker.version} - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - ch.qos.logback - logback-classic - ${logback.version} - runtime - - - - org.codehaus.mojo - animal-sniffer-annotations - ${animal-sniffer.version} - provided - - - org.openjdk.jmh - jmh-core - ${jmh.version} - test - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - test - - - org.openjdk.jcstress - jcstress-core - ${jcstress.version} - test - - - org.jetbrains.lincheck - lincheck - ${lincheck.version} - test - - - - io.github.hakky54 - logcaptor - ${logcaptor.version} - test - - - - io.projectreactor - reactor-core - ${reactor.version} - test - - - io.projectreactor - reactor-test - ${reactor.version} - test - + - - com.vmlens - api - ${vmlens.version} - test - - - - - - - - com.diffplug.spotless - spotless-maven-plugin - ${spotless.version} - - - com.github.spotbugs - spotbugs-maven-plugin - ${spotbugs.version} - - - com.vmlens - vmlens-maven-plugin - ${vmlens.version} - - - io.github.git-commit-id - git-commit-id-maven-plugin - 10.0.0 - - - org.apache.maven.plugins - maven-assembly-plugin - 3.8.0 - - - org.apache.maven.plugins - maven-compiler-plugin - 3.15.0 - - - org.apache.maven.plugins - maven-gpg-plugin - 3.2.8 - - - org.apache.maven.plugins - maven-jar-plugin - 3.5.0 - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.12.0 - - - org.apache.maven.plugins - maven-resources-plugin - 3.5.0 - - - org.apache.maven.plugins - maven-source-plugin - 3.4.0 - - - org.apache.maven.plugins - maven-surefire-plugin - 3.5.6 - - - - **/vmlens/*.java - - - - - org.apache.maven.plugins - maven-enforcer-plugin - 3.6.3 - - - org.codehaus.mojo - exec-maven-plugin - 3.6.3 - - - org.jacoco - jacoco-maven-plugin - 0.8.15 - - - org.pitest - pitest-maven - 1.25.5 - - - org.sonatype.central - central-publishing-maven-plugin - 0.11.0 - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - - - enforce - - enforce - - - - - [3.6.3,) - - - [1.8,) - - - - - - commons-logging:commons-logging - - log4j:log4j - - org.hamcrest:hamcrest-core - org.hamcrest:hamcrest-library - org.hamcrest:hamcrest-all - - junit:junit - junit:junit-dep - - - - - - - - - io.github.git-commit-id - git-commit-id-maven-plugin - - - get-git-properties - - revision - - initialize - - - - yyyy-MM-dd'T'HH:mm:ss'Z' - UTC - false - false - - - - org.apache.maven.plugins - maven-compiler-plugin - - 8 - 21 - true - true - - - -Xlint:all,-serial,-options,-classfile,-processing - -Werror - - - -processor - lombok.launch.AnnotationProcessorHider$AnnotationProcessor,lombok.launch.AnnotationProcessorHider$ClaimingProcessor,org.checkerframework.checker.nullness.NullnessChecker - -XDaddTypeAnnotationsToSymbol=true - -XDcompilePolicy=simple - --should-stop=ifError=FLOW - -Xplugin:ErrorProne -Xep:NullAway:ERROR -XepOpt:NullAway:OnlyNullMarked=true -XepOpt:NullAway:JSpecifyMode=true -XepOpt:NullAway:CheckOptionalEmptiness=true -XepOpt:NullAway:AcknowledgeRestrictiveAnnotations=true -XepOpt:NullAway:AcknowledgeAndroidRecent=true -XepOpt:NullAway:AssertsEnabled=true -Xep:BoxedPrimitiveEquality:ERROR -Xep:EqualsHashCode:ERROR -Xep:EqualsIncompatibleType:ERROR -Xep:IdentityBinaryExpression:ERROR -Xep:SelfAssignment:ERROR -Xep:SelfComparison:ERROR -Xep:SelfEquals:ERROR -Xep:DeadException:ERROR -Xep:FormatString:ERROR -Xep:InvalidPatternSyntax:ERROR -Xep:OptionalEquality:ERROR -Xep:ImpossibleNullComparison:ERROR - - - - org.projectlombok - lombok - ${lombok.version} - - - com.google.errorprone - error_prone_core - ${errorprone.version} - - - com.uber.nullaway - nullaway - ${nullaway.version} - - - org.checkerframework - checker - ${checker.version} - - - - - - default-compile - - - - module-info.java - - - - - module-info-compile - compile - - compile - - - - 9 - - module-info.java - - - - - - - default-testCompile - - - false - - -XDaddTypeAnnotationsToSymbol=true - -XDcompilePolicy=simple - --should-stop=ifError=FLOW - -Xplugin:ErrorProne -Xep:NullAway:OFF -Xep:GuardedBy:OFF - - - - org.openjdk.jcstress - jcstress-core - ${jcstress.version} - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - - - - - - - - maven-resources-plugin - - - org.apache.maven.plugins - maven-source-plugin - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - 1.8 - true - true - all - - - - attach-javadocs - - jar - - - - - - org.jacoco - jacoco-maven-plugin - - - prepare-agent - - prepare-agent - - - - report - test - - report - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - @{argLine} -Xmx2g -XX:ErrorFile=hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=. - - true - - - - com.diffplug.spotless - spotless-maven-plugin - - - - src/main/java/**/*.java - src/test/java/**/*.java - - - ${palantir-java-format.version} - - - - - - - - - spotless-check - verify - - check - - - - - - com.github.spotbugs - spotbugs-maven-plugin - - Max - Low - true - false - spotbugs-exclude.xml - - - com.mebigfatguy.fb-contrib - fb-contrib - ${fb-contrib.version} - - - com.h3xstream.findsecbugs - findsecbugs-plugin - ${findsecbugs.version} - - - - - - spotbugs-check - verify - - check - - - - - - org.codehaus.mojo - exec-maven-plugin - - org.openjdk.jmh.Main - test - - - - - org.pitest - pitest-maven - - - org.pitest - pitest-junit5-plugin - 1.2.3 - - - - - net.ladenthin.llama.value.* - net.ladenthin.llama.exception.* - net.ladenthin.llama.args.* - net.ladenthin.llama.json.TimingsLogger - net.ladenthin.llama.json.RerankResponseParser - net.ladenthin.llama.json.ChatResponseParser - net.ladenthin.llama.json.CompletionResponseParser - - - net.ladenthin.llama.value.* - net.ladenthin.llama.exception.* - net.ladenthin.llama.args.* - net.ladenthin.llama.json.* - - 100 - 30000 - - - - - - release @@ -720,6 +76,7 @@ SPDX-License-Identifier: MIT org.apache.maven.plugins maven-gpg-plugin + 3.2.8 sign-artifacts @@ -739,6 +96,7 @@ SPDX-License-Identifier: MIT org.sonatype.central central-publishing-maven-plugin + 0.11.0 true central @@ -749,601 +107,5 @@ SPDX-License-Identifier: MIT - - - cuda - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - gpu - compile - - compile - - - - - module-info.java - - - -h - src/main/cpp - - - ${project.build.outputDirectory}_cuda - - - - - - maven-resources-plugin - - - - copy-resources - process-classes - - copy-resources - - - - ${project.build.outputDirectory}_cuda - - - - ${basedir}/src/main/resources_linux_cuda/ - - **/*.* - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - cuda - package - - jar - - - cuda13-linux-x86-64 - - ${project.build.outputDirectory}_cuda - - - - - - - - - - opencl-android - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - opencl-android - compile - - compile - - - - - module-info.java - - - -h - src/main/cpp - - - ${project.build.outputDirectory}_opencl_android - - - - - - maven-resources-plugin - - - - copy-resources-opencl-android - process-classes - - copy-resources - - - - ${project.build.outputDirectory}_opencl_android - - - - ${basedir}/src/main/resources_android_opencl/ - - **/*.* - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - opencl-android - package - - jar - - - opencl-android-aarch64 - - ${project.build.outputDirectory}_opencl_android - - - - - - - - - - windows-msvc - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - windows-msvc - compile - - compile - - - - - module-info.java - - - -h - src/main/cpp - - - ${project.build.outputDirectory}_windows_msvc - - - - - - maven-resources-plugin - - - - copy-resources-windows-msvc - process-classes - - copy-resources - - - - ${project.build.outputDirectory}_windows_msvc - - - - ${basedir}/src/main/resources_windows_msvc/ - - **/*.* - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - windows-msvc - package - - jar - - - msvc-windows - - ${project.build.outputDirectory}_windows_msvc - - - - - - - - - - - cuda-windows - - - - org.apache.maven.plugins - maven-compiler-plugin - - - cuda-windows - compile - - compile - - - - module-info.java - - - -h - src/main/cpp - - - ${project.build.outputDirectory}_windows_cuda - - - - - - maven-resources-plugin - - - copy-resources-cuda-windows - process-classes - - copy-resources - - - - ${project.build.outputDirectory}_windows_cuda - - - - ${basedir}/src/main/resources_windows_cuda/ - - **/*.* - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - cuda-windows - package - - jar - - - cuda13-windows-x86-64 - - ${project.build.outputDirectory}_windows_cuda - - - - - - - - - - - vulkan-windows - - - - org.apache.maven.plugins - maven-compiler-plugin - - - vulkan-windows - compile - - compile - - - - module-info.java - - - -h - src/main/cpp - - - ${project.build.outputDirectory}_windows_vulkan - - - - - - maven-resources-plugin - - - copy-resources-vulkan-windows - process-classes - - copy-resources - - - - ${project.build.outputDirectory}_windows_vulkan - - - - ${basedir}/src/main/resources_windows_vulkan/ - - **/*.* - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - vulkan-windows - package - - jar - - - vulkan-windows-x86-64 - - ${project.build.outputDirectory}_windows_vulkan - - - - - - - - - - - opencl-windows - - - - org.apache.maven.plugins - maven-compiler-plugin - - - opencl-windows - compile - - compile - - - - module-info.java - - - -h - src/main/cpp - - - ${project.build.outputDirectory}_windows_opencl - - - - - - maven-resources-plugin - - - copy-resources-opencl-windows - process-classes - - copy-resources - - - - ${project.build.outputDirectory}_windows_opencl - - - - ${basedir}/src/main/resources_windows_opencl/ - - **/*.* - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - opencl-windows - package - - jar - - - opencl-windows-x86-64 - - ${project.build.outputDirectory}_windows_opencl - - - - - - - - - - vmlens - - - - com.vmlens - vmlens-maven-plugin - - - - **/vmlens/*.java - - - - - vmlens-test - - test - - - - - - - - - jcstress - - - - org.codehaus.mojo - exec-maven-plugin - - - jcstress - test - exec - - ${java.home}/bin/java - test - - -classpath - - org.openjdk.jcstress.Main - -v - -m - default - - - - - - - - - - - assembly - - - - org.apache.maven.plugins - maven-assembly-plugin - - - jar-with-dependencies - - - - net.ladenthin.llama.server.OpenAiCompatServer - - - - - - build-fat-jar - package - - single - - - - - - -