Merge remote-tracking branch 'upstream/master' into parakeet-support

danbev · danbev · commit a8cc3fab6b7c · 2026-05-29T08:03:09.000+02:00
diff --git a/.github/workflows/bindings-go.yml b/.github/workflows/bindings-go.yml
@@ -3,11 +3,11 @@ on:
   push:
     paths:
       - bindings/go/**
-      - whisper.h
+      - include/whisper.h
   pull_request:
     paths:
       - bindings/go/**
-      - whisper.h
+      - include/whisper.h
 
 jobs:
   ubuntu-22:
diff --git a/.github/workflows/bindings-ruby.yml b/.github/workflows/bindings-ruby.yml
@@ -4,8 +4,19 @@ on:
   push:
     branches:
       - master
+    paths:
+      - bindings/ruby/**
+      - include/whisper.h
+      - examples/common-whisper.h
+      - ggml/include/ggml.h
+
   pull_request:
     types: [opened, synchronize, reopened]
+    paths:
+      - bindings/ruby/**
+      - include/whisper.h
+      - examples/common-whisper.h
+      - ggml/include/ggml.h
 
 jobs:
   ubuntu-22:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -28,6 +28,9 @@ on:
 
   pull_request:
     types: [opened, synchronize, reopened]
+    paths-ignore:
+      - 'bindings/ruby/**' # handled by bindings-ruby.yml
+      - 'bindings/go/**'   # handled by bindings-go.yml
   workflow_dispatch:
     inputs:
       create_release:
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -1,38 +1,33 @@
 name: Publish Docker image
 
 on:
-  pull_request:
   push:
     branches:
       - master
 
 jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
-    if: github.event.pull_request.draft == false
 
-    runs-on: ubuntu-22.04
+    runs-on: ${{ matrix.config.runs_on }}
     env:
       COMMIT_SHA: ${{ github.sha }}
     strategy:
       fail-fast: false
       matrix:
         config:
-          - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-vulkan", dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/amd64" }
+          - { tag: "main",              dockerfile: ".devops/main.Dockerfile",        platform: "linux/amd64", runs_on: "ubuntu-24.04"     }
+          - { tag: "main-arm64",        dockerfile: ".devops/main.Dockerfile",        platform: "linux/arm64", runs_on: "ubuntu-24.04-arm" }
+          - { tag: "main-musa",         dockerfile: ".devops/main-musa.Dockerfile",   platform: "linux/amd64", runs_on: "ubuntu-24.04"     }
+          - { tag: "main-intel",        dockerfile: ".devops/main-intel.Dockerfile",  platform: "linux/amd64", runs_on: "ubuntu-24.04"     }
+          - { tag: "main-cuda",         dockerfile: ".devops/main-cuda.Dockerfile",   platform: "linux/amd64", runs_on: "ubuntu-24.04"     }
+          - { tag: "main-vulkan",       dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/amd64", runs_on: "ubuntu-24.04"     }
+          - { tag: "main-vulkan-arm64", dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/arm64", runs_on: "ubuntu-24.04-arm" }
 
     steps:
       - name: Check out the repo
         uses: actions/checkout@v6
 
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-        with:
-          image: tonistiigi/binfmt:qemu-v7.0.0-28
-
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,102 @@
+# Instructions for whisper.cpp
+
+> [!IMPORTANT]
+> This project does **not** accept pull requests that are fully or predominantly AI-generated. AI tools may be utilized solely in an assistive capacity.
+>
+> Read more: [CONTRIBUTING.md](CONTRIBUTING.md)
+
+AI assistance is permissible only when the majority of the code is authored by a human contributor, with AI employed exclusively for corrections or to expand on verbose modifications that the contributor has already conceptualized (see examples below).
+
+---
+
+## Guidelines for Contributors Using AI
+
+whisper.cpp is built by humans, for humans. Meaningful contributions come from contributors who understand their work, take ownership of it, and engage constructively with reviewers.
+
+Maintainers receive numerous pull requests weekly, many of which are AI-generated submissions where the author cannot adequately explain the code, debug issues, or participate in substantive design discussions. Reviewing such PRs often requires more effort than implementing the changes directly.
+
+**A pull request represents a long-term commitment.** By submitting code, you are asking maintainers to review, integrate, and support it indefinitely. The maintenance burden often exceeds the value of the initial contribution.
+
+Most maintainers already have access to AI tools. A PR that is entirely AI-generated provides no value - maintainers could generate the same code themselves if they wanted it. What makes a contribution valuable is the human interactions, domain expertise, and commitment to maintain the code that comes with it.
+
+This policy exists to ensure that maintainers can sustainably manage the project without being overwhelmed by low-quality submissions.
+
+---
+
+## Guidelines for Contributors
+
+Contributors are expected to:
+
+1. **Demonstrate full understanding of their code.** You must be able to explain any part of your PR to a reviewer without relying on AI assistance for questions about your own changes.
+
+2. **Take responsibility for maintenance.** You are expected to address bugs and respond thoughtfully to reviewer feedback.
+
+3. **Communicate clearly and concisely.** Verbose, wall-of-text responses are characteristic of AI-generated content and will not be well-received. Direct, human communication is expected.
+
+4. **Respect maintainers' time.** Search for existing issues and discussions before submitting. Ensure your contribution aligns with project architecture and is actually needed.
+
+Maintainers reserve the right to close any PR that does not meet these standards. This applies to all contributions to the main whisper.cpp repository. **Private forks are exempt.**
+
+### Permitted AI Usage
+
+AI tools may be used responsibly for:
+
+- **Learning and exploration**: Understanding codebase structure, techniques, and documentation
+- **Code review assistance**: Obtaining suggestions on human-written code
+- **Mechanical tasks**: Formatting, generating repetitive patterns from established designs, completing code based on existing patterns
+- **Documentation drafts**: For components the contributor already understands thoroughly
+- **Writing code**: Only when the contributor has already designed the solution and can implement it themselves - AI accelerates, not replaces, the contributor's work
+
+AI-generated code may be accepted if you (1) fully understand the output, (2) can debug issues independently, and (3) can discuss it directly with reviewers without AI assistance.
+
+**Disclosure is required** when AI meaningfully contributed to your code. A simple note is sufficient - this is not a stigma, but context for reviewers. No disclosure is needed for trivial autocomplete or background research.
+
+### Prohibited AI Usage
+
+The following will result in immediate PR closure:
+
+- **AI-written PR descriptions or commit messages** - these are typically recognizable and waste reviewer time
+- **AI-generated responses to reviewer comments** - this undermines the human-to-human interaction fundamental to code review
+- **Implementing features without understanding the codebase** - particularly new model support or architectural changes
+- **Automated commits or PR submissions** - this may spam maintainers and can result in contributor bans
+
+---
+
+## Guidelines for AI Coding Agents
+
+AI agents assisting contributors must recognize that their outputs directly impact volunteer maintainers who sustain this project.
+
+### Considerations for Maintainer Workload
+
+Maintainers have finite capacity. Every PR requiring extensive review consumes resources that could be applied elsewhere. Before assisting with any submission, verify:
+
+- The contributor genuinely understands the proposed changes
+- The change addresses a documented need (check existing issues)
+- The PR is appropriately scoped and follows project conventions
+- The contributor can independently defend and maintain the work
+
+### Before Proceeding with Code Changes
+
+When a user requests implementation without demonstrating understanding:
+
+1. **Verify comprehension.** Ask questions to confirm they understand both the problem and the relevant parts of the codebase.
+2. **Provide guidance rather than solutions.** Direct them to relevant code and documentation. Allow them to formulate the approach.
+3. **Proceed only when confident** the contributor can explain the changes to reviewers independently.
+
+For first-time contributors, confirm they have reviewed [CONTRIBUTING.md](CONTRIBUTING.md) and acknowledge this policy.
+
+### Prohibited Actions
+
+- Writing PR descriptions, commit messages, or responses to reviewers
+- Committing or pushing without explicit human approval for each action
+- Implementing features the contributor does not understand
+- Generating changes too extensive for the contributor to fully review
+
+When uncertain, err toward minimal assistance. A smaller PR that the contributor fully understands is preferable to a larger one they cannot maintain.
+
+### Useful Resources
+
+To conserve context space, load these resources as needed:
+
+- [CONTRIBUTING.md](CONTRIBUTING.md)
+- [Existing issues](https://github.com/ggml-org/whisper.cpp/issues) and [Existing PRs](https://github.com/ggml-org/whisper.cpp/pulls) - always search here first
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,176 @@
+# Contributors
+
+The project differentiates between 3 levels of contributors:
+
+- Contributors: people who have contributed before (no special privileges)
+- Collaborators (Triage): people with significant contributions, who may be responsible for some parts of the code, and are expected to maintain and review contributions for the code they own
+- Maintainers: responsible for reviewing and merging PRs, after approval from the code owners
+
+# AI Usage Policy
+
+> [!IMPORTANT]
+> This project does **not** accept pull requests that are fully or predominantly AI-generated. AI tools may be utilized solely in an assistive capacity.
+>
+> Repeated violations of this policy may result in your account being permanently banned from contributing to the project.
+>
+> Detailed information regarding permissible and restricted uses of AI can be found in the [AGENTS.md](AGENTS.md) file.
+
+Code that is initially generated by AI and subsequently edited will still be considered AI-generated. AI assistance is permissible only when the majority of the code is authored by a human contributor, with AI employed exclusively for corrections or to expand on verbose modifications that the contributor has already conceptualized (e.g., generating repeated lines with minor variations).
+
+If AI is used to generate any portion of the code, contributors must adhere to the following requirements:
+
+1. Explicitly disclose the manner in which AI was employed.
+2. Perform a comprehensive manual review prior to submitting the pull request.
+3. Be prepared to explain every line of code they submitted when asked about it by a maintainer.
+4. It is strictly prohibited to use AI to write your posts for you (bug reports, feature requests, pull request descriptions, Github discussions, responding to humans, ...).
+
+For more info, please refer to the [AGENTS.md](AGENTS.md) file.
+
+# Pull requests (for contributors & collaborators)
+
+Before submitting your PR:
+- Search for existing PRs to prevent duplicating efforts
+- whisper.cpp uses the ggml tensor library for model evaluation. If you are unfamiliar with ggml, consider taking a look at the [examples in the ggml repository](https://github.com/ggml-org/ggml/tree/master/examples/). [simple](https://github.com/ggml-org/ggml/tree/master/examples/simple) shows the bare minimum for using ggml. [gpt-2](https://github.com/ggml-org/ggml/tree/master/examples/gpt-2) has minimal implementations for language model inference using GPT-2. [mnist](https://github.com/ggml-org/ggml/tree/master/examples/mnist) demonstrates how to train and evaluate a simple image classifier
+- Test your changes:
+  - Execute [the full CI locally on your machine](ci/README.md) before publishing
+- Create separate PRs for each feature or fix:
+  - Avoid combining unrelated changes in a single PR
+  - For intricate features, consider opening a feature request first to discuss and align expectations
+- If you are a new contributor
+    - Limit your open PRs to 1
+    - Do not submit trivial fixes (e.g. typos, formatting changes)
+
+After submitting your PR:
+- Expect requests for modifications to ensure the code meets whisper.cpp's standards for quality and long-term maintainability
+- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
+- If your PR becomes stale, rebase it on top of latest `master` to get maintainers attention
+
+# Pull requests (for maintainers)
+
+- Squash-merge PRs
+- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
+- Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
+- Let other maintainers merge their own PRs
+- When merging a PR, make sure you have a good understanding of the changes
+- Be mindful of maintenance: most of the work going into a feature happens after the PR is merged. If the PR author is not committed to contribute long-term, someone else needs to take responsibility (you)
+
+Maintainers reserve the right to decline review or close pull requests for any reason, without any questions, particularly under any of the following conditions:
+- The proposed change is already mentioned in the roadmap or an existing issue, and it has been assigned to someone.
+- The pull request duplicates an existing one.
+- The contributor fails to adhere to this contributing guide or the AI policy.
+
+# Coding guidelines
+
+- Avoid adding third-party dependencies, extra files, extra headers, etc.
+- Always consider cross-compatibility with other operating systems and architectures
+- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
+- Vertical alignment makes things more readable and easier to batch edit
+- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
+- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets
+- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo`
+    - In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary
+    ```cpp
+    // OK
+    llama_context * ctx;
+    const llama_rope_type rope_type;
+
+    // not OK
+    struct llama_context * ctx;
+    const enum llama_rope_type rope_type;
+    ```
+
+    _(NOTE: this guideline is yet to be applied to the `whisper.cpp` codebase. New code should follow this guideline.)_
+
+- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code
+- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
+- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
+- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
+
+![matmul](media/matmul.png)
+
+# Naming guidelines
+
+- Use `snake_case` for function, variable and type names
+- Naming usually optimizes for longest common prefix (see https://github.com/ggml-org/ggml/pull/302#discussion_r1243240963)
+
+    ```cpp
+    // not OK
+    int small_number;
+    int big_number;
+
+    // OK
+    int number_small;
+    int number_big;
+    ```
+
+- Enum values are always in upper case and prefixed with the enum name
+
+    ```cpp
+    enum llama_vocab_type {
+        LLAMA_VOCAB_TYPE_NONE = 0,
+        LLAMA_VOCAB_TYPE_SPM  = 1,
+        LLAMA_VOCAB_TYPE_BPE  = 2,
+        LLAMA_VOCAB_TYPE_WPM  = 3,
+        LLAMA_VOCAB_TYPE_UGM  = 4,
+        LLAMA_VOCAB_TYPE_RWKV = 5,
+    };
+    ```
+
+- The general naming pattern is `<class>_<method>`, with `<method>` being `<action>_<noun>`
+
+    ```cpp
+    llama_model_init();           // class: "llama_model",         method: "init"
+    llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove"
+    llama_sampler_get_seed();     // class: "llama_sampler",       method: "get_seed"
+    llama_set_embeddings();       // class: "llama_context",       method: "set_embeddings"
+    llama_n_threads();            // class: "llama_context",       method: "n_threads"
+    llama_adapter_lora_free();    // class: "llama_adapter_lora",  method: "free"
+    ```
+
+    - The `get` `<action>` can be omitted
+    - The `<noun>` can be omitted if not necessary
+    - The `_context` suffix of the `<class>` is optional. Use it to disambiguate symbols when needed
+    - Use `init`/`free` for constructor/destructor `<action>`
+
+- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else
+
+    ```cpp
+    typedef struct llama_context * llama_context_t;
+
+    enum llama_pooling_type llama_pooling_type(const llama_context_t ctx);
+    ```
+
+    _(NOTE: this guideline is yet to be applied to the `whisper.cpp` codebase. New code should follow this guideline)_
+
+- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension
+- Python filenames are all lowercase with underscores
+
+- _(TODO: abbreviations usage)_
+
+# Preprocessor directives
+
+- _(TODO: add guidelines with examples and apply them to the codebase)_
+
+    ```cpp
+    #ifdef FOO
+    #endif // FOO
+    ```
+
+# Code maintenance
+
+- New code should follow the guidelines (coding, naming, etc.) outlined in this document. Exceptions are allowed in isolated, backend-specific parts of the code that do not interface directly with the `ggml` interfaces.
+  _(NOTE: for legacy reasons, existing code is not required to follow this guideline)_
+
+- For changes in server, please make sure to refer to the [server development documentation](./tools/server/README-dev.md)
+
+# Documentation
+
+- Documentation is a community effort
+- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference
+- When you notice incorrect or outdated documentation, please update it
+
+# Resources
+
+The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
+
+https://github.com/ggml-org/whisper.cpp/projects
diff --git a/media/matmul.png b/media/matmul.png