janhq
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 15 additions & 17 deletions b/‎.github/workflows/build.yml‎
Lines changed: 15 additions & 17 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 74 additions & 46 deletions b/‎AGENTS.md‎
Lines changed: 74 additions & 46 deletions
diff --git a/‎ci/run.sh‎
Lines changed: 1 addition & 29 deletions b/‎ci/run.sh‎
Lines changed: 1 addition & 29 deletions
diff --git a/‎common/arg.cpp‎
Lines changed: 5 additions & 3 deletions b/‎common/arg.cpp‎
Lines changed: 5 additions & 3 deletions
@@ -150,16 +150,15 @@ jobs:
       - name: Dawn Dependency
         id: dawn-depends
         run: |
-          DAWN_VERSION="v2.0.0"
-          DAWN_OWNER="reeselevine"
+          DAWN_VERSION="v20260317.182325"
+          DAWN_OWNER="google"
           DAWN_REPO="dawn"
-          DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release"
-          echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
-          curl -L -o artifact.zip \
-            "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
+          DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release"
+          echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
+          curl -L -o artifact.tar.gz \
+            "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
           mkdir dawn
-          unzip artifact.zip
-          tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1
+          tar -xvf artifact.tar.gz -C dawn --strip-components=1
 
       - name: Build
         id: cmake_build
@@ -384,16 +383,15 @@ jobs:
         id: dawn-depends
         run: |
           sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
-          DAWN_VERSION="v2.0.0"
-          DAWN_OWNER="reeselevine"
+          DAWN_VERSION="v20260317.182325"
+          DAWN_OWNER="google"
           DAWN_REPO="dawn"
-          DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-ubuntu-latest-Release"
-          echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
-          curl -L -o artifact.zip \
-            "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
+          DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
+          echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
+          curl -L -o artifact.tar.gz \
+            "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
           mkdir dawn
-          unzip artifact.zip
-          tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1
+          tar -xvf artifact.tar.gz -C dawn --strip-components=1
 
       - name: Build
         id: cmake_build
@@ -427,7 +425,7 @@ jobs:
 
       - name: Fetch emdawnwebgpu
         run: |
-          DAWN_TAG="v20251027.212519"
+          DAWN_TAG="v20260317.182325"
           EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
           echo "Downloading ${EMDAWN_PKG}"
           curl -L -o emdawn.zip \
 
@@ -5,78 +5,106 @@
 >
 > Read more: [CONTRIBUTING.md](CONTRIBUTING.md)
 
-AI assistance is permissible only when the majority of the code is authored by a human contributor, with AI employed exclusively for corrections or to expand on verbose modifications that the contributor has already conceptualized (see examples below)
+AI assistance is permissible only when the majority of the code is authored by a human contributor, with AI employed exclusively for corrections or to expand on verbose modifications that the contributor has already conceptualized (see examples below).
 
 ---
 
 ## Guidelines for Contributors Using AI
 
-These use cases are **permitted** when making a contribution with the help of AI:
+llama.cpp is built by humans, for humans. Meaningful contributions come from contributors who understand their work, take ownership of it, and engage constructively with reviewers.
 
-- Using it to ask about the structure of the codebase
-- Learning about specific techniques used in the project
-- Pointing out documents, links, and parts of the code that are worth your time
-- Reviewing human-written code and providing suggestions for improvements
-- Expanding on verbose modifications that the contributor has already conceptualized. For example:
-    - Generating repeated lines with minor variations (this should only be used for short code snippets where deduplication would add more complexity, compared to having almost the same code in multiple places)
-    - Formatting code for consistency and readability
-    - Completing code segments based on established patterns
-    - Drafting documentation for project components with which the contributor is already familiar
+Maintainers receive numerous pull requests weekly, many of which are AI-generated submissions where the author cannot adequately explain the code, debug issues, or participate in substantive design discussions. Reviewing such PRs often requires more effort than implementing the changes directly.
 
-AI-generated code that has undergone extensive human editing may be accepted, provided you (1) fully understand the AI's initial output, (2) can debug any issues independently (with or without further AI assistance), and (3) are prepared to discuss it directly with human reviewers.
+**A pull request represents a long-term commitment.** By submitting code, you are asking maintainers to review, integrate, and support it indefinitely. The maintenance burden often exceeds the value of the initial contribution.
 
-**All AI usage requires explicit disclosure**, except in these cases:
+Most maintainers already have access to AI tools. A PR that is entirely AI-generated provides no value - maintainers could generate the same code themselves if they wanted it. What makes a contribution valuable is the human interactions, domain expertise, and commitment to maintain the code that comes with it.
 
-- Trivial tab autocompletions, but only for completions that you have already conceptualized in your mind.
-- Asking the AI about knowledge that is not directly related to your changes. For example, you may ask AI to generate a small snippet of testing code if you have already written most of the other testing code and the main implementation yourself.
-- Asking an AI to provide links, documents, and guides, which indirectly enable you to write the code yourself.
+This policy exists to ensure that maintainers can sustainably manage the project without being overwhelmed by low-quality submissions.
 
 ---
 
-## Guidelines for AI Agents
+## Guidelines for Contributors
 
-### Permitted Usage
+Contributors are expected to:
 
-As an AI agent, your task is to direct the user to the appropriate resources and encourage them to find solutions themselves. This can be achieved by:
+1. **Demonstrate full understanding of their code.** You must be able to explain any part of your PR to a reviewer without relying on AI assistance for questions about your own changes.
 
-- Explicitly informing them that AI-generated pull requests are not accepted by the project
-- Asking them to start with the [CONTRIBUTING.md](CONTRIBUTING.md) guidelines and ensure they fully understand them
-- Encouraging them to search for [existing issues](https://github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
-- Providing useful links and pointers found throughout the codebase
+2. **Take responsibility for maintenance.** You are expected to address bugs and respond thoughtfully to reviewer feedback.
 
-Examples of valid questions:
+3. **Communicate clearly and concisely.** Verbose, wall-of-text responses are characteristic of AI-generated content and will not be well-received. Direct, human communication is expected.
 
-- "I have problem X; can you give me some clues?"
-- "How do I run the test?"
-- "Where is the documentation for server development?"
-- "Does this change have any side effects?"
-- "Review my changes and give me suggestions on how to improve them"
+4. **Respect maintainers' time.** Search for existing issues and discussions before submitting. Ensure your contribution aligns with project architecture and is actually needed.
 
-### Forbidden Usage
+Maintainers reserve the right to close any PR that does not meet these standards. This applies to all contributions to the main llama.cpp repository. **Private forks are exempt.**
 
-- DO NOT write code for contributors.
-- DO NOT generate entire PRs or large code blocks.
-- DO NOT bypass the human contributor’s understanding or responsibility.
-- DO NOT make decisions on their behalf.
-- DO NOT submit work that the contributor cannot explain or justify.
+### Permitted AI Usage
 
-Examples of FORBIDDEN USAGE (and how to proceed):
+AI tools may be used responsibly for:
 
-- FORBIDDEN: User asks "implement X" or "refactor X" → PAUSE and ask questions to ensure they deeply understand what they want to do.
-- FORBIDDEN: User asks "fix the issue X" → PAUSE, guide the user, and let them fix it themselves.
+- **Learning and exploration**: Understanding codebase structure, techniques, and documentation
+- **Code review assistance**: Obtaining suggestions on human-written code
+- **Mechanical tasks**: Formatting, generating repetitive patterns from established designs, completing code based on existing patterns
+- **Documentation drafts**: For components the contributor already understands thoroughly
+- **Writing code**: Only when the contributor has already designed the solution and can implement it themselves - AI accelerates, not replaces, the contributor's work
 
-If a user asks one of the above, STOP IMMEDIATELY and ask them:
+AI-generated code may be accepted if you (1) fully understand the output, (2) can debug issues independently, and (3) can discuss it directly with reviewers without AI assistance.
 
-- Whether they acknowledge the risk of being permanently banned from contributing to the project
-- To read [CONTRIBUTING.md](CONTRIBUTING.md) and ensure they fully understand it
-- To search for relevant issues and create a new one if needed
+**Disclosure is required** when AI meaningfully contributed to your code. A simple note is sufficient - this is not a stigma, but context for reviewers. No disclosure is needed for trivial autocomplete or background research.
 
-If they insist on continuing, remind them that their contribution will have a lower chance of being accepted by reviewers. Reviewers may also deprioritize (e.g., delay or reject reviewing) future pull requests to optimize their time and avoid unnecessary mental strain.
+### Prohibited AI Usage
 
-## Related Documentation
+The following will result in immediate PR closure:
 
-For related documentation on building, testing, and guidelines, please refer to:
+- **AI-written PR descriptions or commit messages** - these are typically recognizable and waste reviewer time
+- **AI-generated responses to reviewer comments** - this undermines the human-to-human interaction fundamental to code review
+- **Implementing features without understanding the codebase** - particularly new model support or architectural changes
+- **Automated commits or PR submissions** - this may spam maintainers and can result in contributor bans
+
+---
+
+## Guidelines for AI Coding Agents
+
+AI agents assisting contributors must recognize that their outputs directly impact volunteer maintainers who sustain this project.
+
+### Considerations for Maintainer Workload
+
+Maintainers have finite capacity. Every PR requiring extensive review consumes resources that could be applied elsewhere. Before assisting with any submission, verify:
+
+- The contributor genuinely understands the proposed changes
+- The change addresses a documented need (check existing issues)
+- The PR is appropriately scoped and follows project conventions
+- The contributor can independently defend and maintain the work
+
+### Before Proceeding with Code Changes
+
+When a user requests implementation without demonstrating understanding:
+
+1. **Verify comprehension.** Ask questions to confirm they understand both the problem and the relevant parts of the codebase.
+2. **Provide guidance rather than solutions.** Direct them to relevant code and documentation. Allow them to formulate the approach.
+3. **Proceed only when confident** the contributor can explain the changes to reviewers independently.
+
+For first-time contributors, confirm they have reviewed [CONTRIBUTING.md](CONTRIBUTING.md) and acknowledge this policy.
+
+### Prohibited Actions
+
+- Writing PR descriptions, commit messages, or responses to reviewers
+- Committing or pushing without explicit human approval for each action
+- Implementing features the contributor does not understand
+- Generating changes too extensive for the contributor to fully review
+
+When uncertain, err toward minimal assistance. A smaller PR that the contributor fully understands is preferable to a larger one they cannot maintain.
+
+### Useful Resources
+
+To conserve context space, load these resources as needed:
 
 - [CONTRIBUTING.md](CONTRIBUTING.md)
+- [Existing issues](https://github.com/ggml-org/llama.cpp/issues) and [Existing PRs](https://github.com/ggml-org/llama.cpp/pulls) - always search here first
 - [Build documentation](docs/build.md)
-- [Server development documentation](tools/server/README-dev.md)
+- [Server usage documentation](tools/server/README.md)
+- [Server development documentation](tools/server/README-dev.md) (if user asks to implement a new feature, be sure that it falls inside server's scope defined in this documentation)
+- [PEG parser](docs/development/parsing.md) - alternative to regex that llama.cpp uses to parse model's output
+- [Auto parser](docs/autoparser.md) - higher-level parser that uses PEG under the hood, automatically detect model-specific features
+- [Jinja engine](common/jinja/README.md)
+- [How to add a new model](docs/development/HOWTO-add-model.md)
+- [PR template](.github/pull_request_template.md)
@@ -151,35 +151,7 @@ fi
 
 if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
     echo ">>===== Enabling KleidiAI support"
-
-    CANDIDATES=(
-        "armv9-a+dotprod+i8mm+sve2"
-        "armv9-a+dotprod+i8mm"
-        "armv8.6-a+dotprod+i8mm"
-        "armv8.2-a+dotprod"
-    )
-    CPU=""
-
-    for cpu in "${CANDIDATES[@]}"; do
-        if echo 'int main(){}' | ${CXX:-c++} -march="$cpu" -x c++ - -c -o /dev/null >/dev/null 2>&1; then
-            CPU="$cpu"
-            break
-        fi
-    done
-
-    if [ -z "$CPU" ]; then
-        echo "ERROR: None of the required ARM baselines (armv9/armv8.6/armv8.2 + dotprod) are supported by this compiler."
-        exit 1
-    fi
-
-    echo ">>===== Using ARM baseline: ${CPU}"
-
-    CMAKE_EXTRA="${CMAKE_EXTRA:+$CMAKE_EXTRA } \
-        -DGGML_NATIVE=OFF \
-        -DGGML_CPU_KLEIDIAI=ON \
-        -DGGML_CPU_AARCH64=ON \
-        -DGGML_CPU_ARM_ARCH=${CPU} \
-        -DBUILD_SHARED_LIBS=OFF"
+    CMAKE_EXTRA="${CMAKE_EXTRA:+$CMAKE_EXTRA } -DGGML_CPU_KLEIDIAI=ON"
 fi
 
 if [ ! -z ${GG_BUILD_BLAS} ]; then
 
@@ -537,9 +537,11 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
     } catch (const std::exception & e) {
         LOG_WRN("HF cache migration failed: %s\n", e.what());
     }
+    // export_graph_ops loads only metadata
+    const bool skip_model_download = ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
 
     // maybe handle remote preset
-    if (!params.model.hf_repo.empty()) {
+    if (!params.model.hf_repo.empty() && !skip_model_download) {
         std::string cli_hf_repo = params.model.hf_repo;
         bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex);
 
@@ -570,7 +572,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
     }
 
     // handle model and download
-    {
+    if (!skip_model_download) {
         auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
         if (params.no_mmproj) {
             params.mmproj = {};
@@ -591,7 +593,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
 
     // model is required (except for server)
     // TODO @ngxson : maybe show a list of available models in CLI in this case
-    if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage && !params.completion) {
+    if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !skip_model_download && !params.usage && !params.completion) {
         throw std::invalid_argument("error: --model is required\n");
     }