sqliteai
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 82 additions & 20 deletions b/‎.github/workflows/main.yml‎
Lines changed: 82 additions & 20 deletions
diff --git a/‎API.md‎
Lines changed: 108 additions & 2 deletions b/‎API.md‎
Lines changed: 108 additions & 2 deletions
diff --git a/‎Makefile‎
Lines changed: 22 additions & 4 deletions b/‎Makefile‎
Lines changed: 22 additions & 4 deletions
@@ -15,44 +15,91 @@ env:
   GGUF_MODEL_DIR: tests/models/unsloth/gemma-3-270m-it-GGUF
   GGUF_MODEL_NAME: gemma-3-270m-it-UD-IQ2_M.gguf
   GGUF_MODEL_URL: https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
+  WHISPER_MODEL_DIR: tests/models/ggerganov/whisper-tiny
+  WHISPER_MODEL_NAME: ggml-tiny.bin
+  WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin
+  AUDIO_TEST_DIR: tests/audio
+  AUDIO_TEST_WAV: tests/audio/jfk.wav
+  AUDIO_TEST_WAV_URL: https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav
 
 jobs:
-  download-model:
+  download-models:
     outputs:
-      cache-key: gguf-${{ steps.meta.outputs.hash }}
-      model-path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
-    name: Download GGUF model
+      gguf-cache-key: gguf-${{ steps.meta.outputs.gguf-hash }}
+      gguf-model-path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
+      whisper-cache-key: whisper-${{ steps.meta.outputs.whisper-hash }}
+      whisper-model-path: ${{ env.WHISPER_MODEL_DIR }}/${{ env.WHISPER_MODEL_NAME }}
+      audio-cache-key: audio-${{ steps.meta.outputs.audio-hash }}
+      audio-test-path: ${{ env.AUDIO_TEST_WAV }}
+    name: Download models and test assets
     runs-on: ubuntu-22.04
     steps:
-      - name: Compute model URL hash
+      - name: Compute URL hashes
         id: meta
         run: |
           if command -v sha256sum >/dev/null 2>&1; then
-            hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
+            gguf_hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
+            whisper_hash=$(echo -n "${{ env.WHISPER_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
+            audio_hash=$(echo -n "${{ env.AUDIO_TEST_WAV_URL }}" | sha256sum | cut -d' ' -f1)
           else
-            hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
+            gguf_hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
+            whisper_hash=$(echo -n "${{ env.WHISPER_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
+            audio_hash=$(echo -n "${{ env.AUDIO_TEST_WAV_URL }}" | shasum -a 256 | cut -d' ' -f1)
           fi
-          echo "hash=$hash" >> "$GITHUB_OUTPUT"
+          echo "gguf-hash=$gguf_hash" >> "$GITHUB_OUTPUT"
+          echo "whisper-hash=$whisper_hash" >> "$GITHUB_OUTPUT"
+          echo "audio-hash=$audio_hash" >> "$GITHUB_OUTPUT"
 
-      - name: Prepare model directory
-        run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+      - name: Prepare directories
+        run: |
+          mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+          mkdir -p "${{ env.WHISPER_MODEL_DIR }}"
+          mkdir -p "${{ env.AUDIO_TEST_DIR }}"
 
       - name: Restore GGUF cache
-        id: cache
+        id: cache-gguf
         uses: actions/cache@v4
         with:
           path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
-          key: gguf-${{ steps.meta.outputs.hash }}
+          key: gguf-${{ steps.meta.outputs.gguf-hash }}
 
       - name: Download GGUF model
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          curl -L --fail --retry 3 "${{ env.GGUF_MODEL_URL }}" -o "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
+        if: steps.cache-gguf.outputs.cache-hit != 'true'
+        run: curl -L --fail --retry 3 "${{ env.GGUF_MODEL_URL }}" -o "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
+
       - name: Verify GGUF model
         run: test -f "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
 
+      - name: Restore Whisper cache
+        id: cache-whisper
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.WHISPER_MODEL_DIR }}/${{ env.WHISPER_MODEL_NAME }}
+          key: whisper-${{ steps.meta.outputs.whisper-hash }}
+
+      - name: Download Whisper model
+        if: steps.cache-whisper.outputs.cache-hit != 'true'
+        run: curl -L --fail --retry 3 "${{ env.WHISPER_MODEL_URL }}" -o "${{ env.WHISPER_MODEL_DIR }}/${{ env.WHISPER_MODEL_NAME }}"
+
+      - name: Verify Whisper model
+        run: test -f "${{ env.WHISPER_MODEL_DIR }}/${{ env.WHISPER_MODEL_NAME }}"
+
+      - name: Restore audio test file cache
+        id: cache-audio
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.AUDIO_TEST_WAV }}
+          key: audio-${{ steps.meta.outputs.audio-hash }}
+
+      - name: Download audio test file
+        if: steps.cache-audio.outputs.cache-hit != 'true'
+        run: curl -L --fail --retry 3 "${{ env.AUDIO_TEST_WAV_URL }}" -o "${{ env.AUDIO_TEST_WAV }}"
+
+      - name: Verify audio test file
+        run: test -f "${{ env.AUDIO_TEST_WAV }}"
+
   build:
-    needs: download-model
+    needs: download-models
     runs-on: ${{ matrix.os }}
     container: ${{ matrix.container && matrix.container || '' }}
     name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}}
@@ -150,14 +197,29 @@ jobs:
         with:
           submodules: true
 
-      - name: Prepare GGUF model directory
-        run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+      - name: Prepare test asset directories
+        run: |
+          mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+          mkdir -p "${{ env.WHISPER_MODEL_DIR }}"
+          mkdir -p "${{ env.AUDIO_TEST_DIR }}"
 
       - name: Restore GGUF cache
         uses: actions/cache@v4
         with:
-          path: ${{ needs.download-model.outputs.model-path }}
-          key: ${{ needs.download-model.outputs.cache-key }}
+          path: ${{ needs.download-models.outputs.gguf-model-path }}
+          key: ${{ needs.download-models.outputs.gguf-cache-key }}
+
+      - name: Restore Whisper cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ needs.download-models.outputs.whisper-model-path }}
+          key: ${{ needs.download-models.outputs.whisper-cache-key }}
+
+      - name: Restore audio test file cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ needs.download-models.outputs.audio-test-path }}
+          key: ${{ needs.download-models.outputs.audio-cache-key }}
 
       - name: android setup java
         if: matrix.name == 'android-aar'
 
@@ -16,7 +16,7 @@ Returns the current version of the SQLite-AI extension.
 
 ```sql
 SELECT ai_version();
--- e.g., '0.5.1'
+-- e.g., '0.9.0'
 ```
 
 ---
@@ -706,7 +706,7 @@ SELECT llm_chat_restore('b59e...');
 
 **Description:**
 Generates a context-aware reply using chat memory, returned as a single, complete response.
-For a streams model reply, use the llm_chat virtual table.
+For a streaming model reply, use the llm_chat virtual table.
 
 **Example:**
 
@@ -716,6 +716,112 @@ SELECT llm_chat_respond('What are the most visited cities in Italy?');
 
 ---
 
+## `llm_chat_system_prompt(text TEXT)`
+
+**Returns:** `TEXT` or `NULL`
+
+**Description:**
+Gets or sets the system prompt for chat sessions. When called without arguments, returns the current system prompt (or `NULL` if none is set). When called with a text argument, sets the system prompt and returns `NULL`. The system prompt is automatically prepended as a system-role message at the beginning of chat conversations.
+
+**Example:**
+
+```sql
+-- Set a system prompt
+SELECT llm_chat_system_prompt('You are a helpful assistant that speaks Italian.');
+
+-- Get the current system prompt
+SELECT llm_chat_system_prompt();
+```
+
+---
+
+## Audio Functions
+
+### `audio_model_load(path TEXT, options TEXT)`
+
+**Returns:** `NULL`
+
+**Description:**
+Loads a Whisper model from the specified file path with optional comma-separated key=value configuration. The model is used for audio transcription via `audio_model_transcribe`. Only one whisper model can be loaded at a time per connection.
+
+**Example:**
+
+```sql
+-- Load with defaults
+SELECT audio_model_load('./models/ggml-tiny.bin');
+
+-- Load with options
+SELECT audio_model_load('./models/ggml-base.bin', 'gpu_layers=0');
+```
+
+---
+
+### `audio_model_free()`
+
+**Returns:** `NULL`
+
+**Description:**
+Unloads the current Whisper model and frees associated memory.
+
+**Example:**
+
+```sql
+SELECT audio_model_free();
+```
+
+---
+
+### `audio_model_transcribe(input TEXT/BLOB, options TEXT)`
+
+**Returns:** `TEXT`
+
+**Description:**
+Transcribes audio to text using the loaded Whisper model. The input can be either:
+- **TEXT**: A file path to an audio file (WAV, MP3, or FLAC)
+- **BLOB**: Raw audio data (format auto-detected from magic bytes)
+
+An optional second parameter accepts comma-separated key=value pairs to configure transcription behavior.
+
+Supported audio formats: WAV, MP3, FLAC. Audio is automatically converted to mono 16kHz PCM as required by Whisper.
+
+**Transcription options:**
+
+| Key                | Type     | Default | Meaning                                                    |
+| ------------------ | -------- | ------- | ---------------------------------------------------------- |
+| `language`         | `text`   | `en`    | Language code (e.g., `en`, `it`, `fr`, `de`).              |
+| `translate`        | `1 or 0` | `0`    | Translate to English.                                      |
+| `n_threads`        | `number` | `4`     | Number of threads for decoding.                            |
+| `offset_ms`        | `number` | `0`     | Start transcription at this offset (milliseconds).         |
+| `duration_ms`      | `number` | `0`     | Transcribe only this duration (0 = full audio).            |
+| `no_timestamps`    | `1 or 0` | `0`    | Suppress timestamps in output.                             |
+| `single_segment`   | `1 or 0` | `0`    | Force single segment output.                               |
+| `token_timestamps` | `1 or 0` | `0`    | Enable token-level timestamps.                             |
+| `initial_prompt`   | `text`   |         | Initial prompt to guide the model.                         |
+| `temperature`      | `float`  | `0.0`   | Sampling temperature.                                      |
+| `beam_size`        | `number` | `-1`    | Beam search size (-1 = use default).                       |
+| `audio_ctx`        | `number` | `0`     | Audio context size (0 = use default).                      |
+| `suppress_regex`   | `text`   |         | Regex pattern for suppressing tokens.                      |
+| `max_len`          | `number` | `0`     | Maximum segment length in characters (0 = no limit).       |
+| `print_timestamps` | `1 or 0` | `0`    | Include timestamps in transcribed text.                    |
+
+**Examples:**
+
+```sql
+-- Transcribe from a file path
+SELECT audio_model_transcribe('./audio/speech.wav');
+
+-- Transcribe from a BLOB column
+SELECT audio_model_transcribe(audio_data) FROM recordings WHERE id = 1;
+
+-- Transcribe with options
+SELECT audio_model_transcribe('./audio/speech.mp3', 'language=it,translate=1');
+
+-- Transcribe a single segment with no timestamps
+SELECT audio_model_transcribe('./audio/clip.flac', 'single_segment=1,no_timestamps=1');
+```
+
+---
+
 ## Model Metadata
 
 These functions return internal model properties:
 
@@ -44,6 +44,15 @@ GGUF_MODEL_DIR ?= tests/models/unsloth/gemma-3-270m-it-GGUF
 GGUF_MODEL_NAME ?= gemma-3-270m-it-UD-IQ2_M.gguf
 GGUF_MODEL_URL ?= https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
 GGUF_MODEL_PATH := $(GGUF_MODEL_DIR)/$(GGUF_MODEL_NAME)
+
+WHISPER_MODEL_DIR ?= tests/models/ggerganov/whisper-tiny
+WHISPER_MODEL_NAME ?= ggml-tiny.bin
+WHISPER_MODEL_URL ?= https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin
+WHISPER_MODEL_PATH := $(WHISPER_MODEL_DIR)/$(WHISPER_MODEL_NAME)
+
+AUDIO_TEST_DIR ?= tests/audio
+AUDIO_TEST_WAV ?= $(AUDIO_TEST_DIR)/jfk.wav
+AUDIO_TEST_WAV_URL ?= https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav
 SKIP_UNITTEST ?= 0
 
 # Compiler and flags
@@ -88,7 +97,8 @@ ifeq ($(PLATFORM),windows)
 	STRIP = strip --strip-unneeded $@
 else ifeq ($(PLATFORM),macos)
 	TARGET := $(DIST_DIR)/ai.dylib
-	LLAMA_LIBS += $(BUILD_GGML)/lib/libggml-metal.a
+	LLAMA_LIBS += $(BUILD_GGML)/lib/libggml-metal.a $(BUILD_GGML)/lib/libggml-blas.a
+	LLAMA_LDFLAGS += $(L)ggml-blas$(A)
 	ifndef ARCH
 		LDFLAGS += -arch x86_64 -arch arm64
 		CFLAGS += -arch x86_64 -arch arm64
@@ -102,7 +112,7 @@ else ifeq ($(PLATFORM),macos)
 		WHISPER_OPTIONS += -DGGML_OPENMP=OFF -DCMAKE_OSX_ARCHITECTURES="$(ARCH)" -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 		MINIAUDIO_OPTIONS += -DCMAKE_OSX_ARCHITECTURES="$(ARCH)" -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 	endif
-	LDFLAGS += -L./$(BUILD_GGML)/lib -lggml-metal -L./$(BUILD_GGML)/lib -framework Metal -framework Foundation -framework CoreFoundation -framework QuartzCore -dynamiclib -undefined dynamic_lookup -headerpad_max_install_names
+	LDFLAGS += -L./$(BUILD_GGML)/lib -lggml-metal -L./$(BUILD_GGML)/lib -framework Accelerate -framework Metal -framework Foundation -framework CoreFoundation -framework QuartzCore -dynamiclib -undefined dynamic_lookup -headerpad_max_install_names
 	STRIP = strip -x -S $@
 else ifeq ($(PLATFORM),android)
 	ifndef ARCH # Set ARCH to find Android NDK's Clang compiler, the user should set the ARCH
@@ -235,16 +245,24 @@ $(GGUF_MODEL_PATH):
 	@mkdir -p $(GGUF_MODEL_DIR)
 	curl -L --fail --retry 3 -o $@ $(GGUF_MODEL_URL)
 
+$(WHISPER_MODEL_PATH):
+	@mkdir -p $(WHISPER_MODEL_DIR)
+	curl -L --fail --retry 3 -o $@ $(WHISPER_MODEL_URL)
+
+$(AUDIO_TEST_WAV):
+	@mkdir -p $(AUDIO_TEST_DIR)
+	curl -L --fail --retry 3 -o $@ $(AUDIO_TEST_WAV_URL)
+
 TEST_DEPS := $(TARGET)
 ifeq ($(SKIP_UNITTEST),0)
-TEST_DEPS += $(CTEST_BIN) $(GGUF_MODEL_PATH)
+TEST_DEPS += $(CTEST_BIN) $(GGUF_MODEL_PATH) $(WHISPER_MODEL_PATH) $(AUDIO_TEST_WAV)
 endif
 
 test: $(TEST_DEPS)
 		@echo "Running sqlite3 CLI smoke test (ensures .load works)..."
 		$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
 ifeq ($(SKIP_UNITTEST),0)
-		$(CTEST_BIN) --extension "$(TARGET)" --model "$(GGUF_MODEL_PATH)"
+		$(CTEST_BIN) --extension "$(TARGET)" --model "$(GGUF_MODEL_PATH)" --whisper-model "$(WHISPER_MODEL_PATH)" --audio "$(AUDIO_TEST_WAV)"
 else
 		@echo "Skipping C unit tests (SKIP_UNITTEST=$(SKIP_UNITTEST))."
 endif