diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4e7c5b24dc3..7c0b0336e0e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -107,6 +107,8 @@ add_library(whisper ../include/whisper.h whisper-arch.h whisper.cpp + aneforge/whisper-aneforge.h + aneforge/whisper-aneforge.cpp ) add_library(parakeet diff --git a/src/aneforge/whisper-aneforge.cpp b/src/aneforge/whisper-aneforge.cpp new file mode 100644 index 00000000000..33acb001162 --- /dev/null +++ b/src/aneforge/whisper-aneforge.cpp @@ -0,0 +1,114 @@ +// ANEForge encoder backend (see whisper-aneforge.h). dlopen's the ANEForge dispatch +// dylib (libane_e5rt_dispatch.dylib) so no extra link configuration is needed; the +// path comes from ANEFORGE_DYLIB. Compiles the persisted encoder MIL once at init +// (compile-with-cache; the on-device program is not cold-loadable across processes), +// then dispatches it per encode. +#include "whisper-aneforge.h" +#include +#include +#include +#include +#include +#include +#include + +typedef struct ane_e5rt_program ane_e5rt_program_t; +typedef ane_e5rt_program_t * (*compile_fn)(const char *, const char *, uint64_t, + const char * const *, const size_t *, size_t, const char * const *, const size_t *, size_t); +typedef int (*set_in_fn)(ane_e5rt_program_t *, const char *, const uint16_t *, size_t); +typedef int (*get_out_fn)(ane_e5rt_program_t *, const char *, uint16_t *, size_t); +typedef int (*exec_fn)(ane_e5rt_program_t *); +typedef void (*release_fn)(ane_e5rt_program_t *); + +struct whisper_aneforge_context { + void * dl = nullptr; + ane_e5rt_program_t * prog = nullptr; + set_in_fn set_input = nullptr; + get_out_fn get_output = nullptr; + exec_fn execute = nullptr; + release_fn release = nullptr; + std::string mel_port, pos_port, out_port; + size_t mel_n = 0, pos_n = 0, out_n = 0; + std::vector mel16; + std::vector out16; +}; + +// arm64 has a native IEEE binary16 type; these casts compile to NEON fcvt and +// auto-vectorize, unlike branchy scalar bit-twiddling. ANEForge's fp16 ports are +// IEEE binary16, which __fp16 matches bit-for-bit. +static inline void f32_to_f16(const float * src, uint16_t * dst, size_t n) { + __fp16 * d = (__fp16 *) dst; + for (size_t i = 0; i < n; i++) d[i] = (__fp16) src[i]; +} +static inline void f16_to_f32(const uint16_t * src, float * dst, size_t n) { + const __fp16 * s = (const __fp16 *) src; + for (size_t i = 0; i < n; i++) dst[i] = (float) s[i]; +} + +struct whisper_aneforge_context * whisper_aneforge_init(const char * bundle_dir) { + const char * dylib = getenv("ANEFORGE_DYLIB"); + if (!dylib) { fprintf(stderr, "aneforge: set ANEFORGE_DYLIB to libane_e5rt_dispatch.dylib\n"); return nullptr; } + void * dl = dlopen(dylib, RTLD_NOW | RTLD_LOCAL); + if (!dl) { fprintf(stderr, "aneforge: dlopen(%s) failed: %s\n", dylib, dlerror()); return nullptr; } + + auto ctx = new whisper_aneforge_context(); + ctx->dl = dl; + auto compile = (compile_fn) dlsym(dl, "ane_e5rt_program_compile"); + ctx->set_input = (set_in_fn) dlsym(dl, "ane_e5rt_program_set_input_fp16"); + ctx->get_output = (get_out_fn) dlsym(dl, "ane_e5rt_program_get_output_fp16"); + ctx->execute = (exec_fn) dlsym(dl, "ane_e5rt_program_execute"); + ctx->release = (release_fn) dlsym(dl, "ane_e5rt_program_release"); + if (!compile || !ctx->set_input || !ctx->get_output || !ctx->execute || !ctx->release) { + fprintf(stderr, "aneforge: missing dispatch symbols\n"); whisper_aneforge_free(ctx); return nullptr; + } + + // ports.txt: three lines "name nelems" for mel, pos, output (in that order). + std::string dir = bundle_dir; + FILE * pf = fopen((dir + "/ports.txt").c_str(), "r"); + if (!pf) { fprintf(stderr, "aneforge: no ports.txt in %s\n", bundle_dir); whisper_aneforge_free(ctx); return nullptr; } + char nm[256]; size_t ne; + std::string * ports[3] = {&ctx->mel_port, &ctx->pos_port, &ctx->out_port}; + size_t * nes[3] = {&ctx->mel_n, &ctx->pos_n, &ctx->out_n}; + for (int i = 0; i < 3; i++) { if (fscanf(pf, "%255s %zu", nm, &ne) != 2) { fclose(pf); whisper_aneforge_free(ctx); return nullptr; } *ports[i] = nm; *nes[i] = ne; } + fclose(pf); + + std::string mil = dir + "/model.mil", cache = dir + "/cache"; + const char * in_names[2] = {ctx->mel_port.c_str(), ctx->pos_port.c_str()}; + size_t in_bytes[2] = {ctx->mel_n * 2, ctx->pos_n * 2}; + const char * out_name = ctx->out_port.c_str(); + size_t out_bytes = ctx->out_n * 2; + ctx->prog = compile(mil.c_str(), cache.c_str(), 0x4 /*ANE*/, in_names, in_bytes, 2, &out_name, &out_bytes, 1); + if (!ctx->prog) { fprintf(stderr, "aneforge: compile failed\n"); whisper_aneforge_free(ctx); return nullptr; } + + // The positional-embedding port is a constant; set it once from pos.f16. + std::vector pos(ctx->pos_n); + FILE * pp = fopen((dir + "/pos.f16").c_str(), "rb"); + if (!pp || fread(pos.data(), 2, ctx->pos_n, pp) != ctx->pos_n) { fprintf(stderr, "aneforge: pos.f16 read failed\n"); if (pp) fclose(pp); whisper_aneforge_free(ctx); return nullptr; } + fclose(pp); + ctx->set_input(ctx->prog, ctx->pos_port.c_str(), pos.data(), ctx->pos_n); + + ctx->mel16.resize(ctx->mel_n); + ctx->out16.resize(ctx->out_n); + fprintf(stderr, "aneforge: encoder ready (mel=%s pos=%s out=%s)\n", + ctx->mel_port.c_str(), ctx->pos_port.c_str(), ctx->out_port.c_str()); + return ctx; +} + +void whisper_aneforge_encode(struct whisper_aneforge_context * ctx, + int64_t n_mel, int64_t n_len, const float * mel, float * out) { + size_t n = (size_t) n_mel * (size_t) n_len; + if (n != ctx->mel_n) { fprintf(stderr, "aneforge: mel size %zu != %zu\n", n, ctx->mel_n); return; } + f32_to_f16(mel, ctx->mel16.data(), n); + ctx->set_input(ctx->prog, ctx->mel_port.c_str(), ctx->mel16.data(), ctx->mel_n); + ctx->execute(ctx->prog); + ctx->get_output(ctx->prog, ctx->out_port.c_str(), ctx->out16.data(), ctx->out_n); + // The encoder output is [S, d_model] row-major, which is whisper.cpp's embd_enc layout. + f16_to_f32(ctx->out16.data(), out, ctx->out_n); +} + +void whisper_aneforge_free(struct whisper_aneforge_context * ctx) { + if (!ctx) return; + if (ctx->prog && ctx->release) ctx->release(ctx->prog); + if (ctx->dl) dlclose(ctx->dl); + delete ctx; +} diff --git a/src/aneforge/whisper-aneforge.h b/src/aneforge/whisper-aneforge.h new file mode 100644 index 00000000000..7ecfe14998a --- /dev/null +++ b/src/aneforge/whisper-aneforge.h @@ -0,0 +1,25 @@ +// ANEForge encoder backend for whisper.cpp: runs the audio encoder directly on the +// Apple Neural Engine via ANEForge's e5rt dispatch shim, in place of the ggml/Metal or +// CoreML encoder. Enabled at runtime by setting ANEFORGE_ENCODER to a bundle directory +// (model.mil + weights.bin + ports.txt + pos.f16), produced by export_encoder.py. +#pragma once +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct whisper_aneforge_context; + +struct whisper_aneforge_context * whisper_aneforge_init(const char * bundle_dir); + +// mel: n_mel x n_len fp32 (channel-major, as whisper stores it). out: the encoder +// output, n_ctx x n_state fp32, written in the [n_state, n_ctx] ggml layout. +void whisper_aneforge_encode(struct whisper_aneforge_context * ctx, + int64_t n_mel, int64_t n_len, const float * mel, float * out); + +void whisper_aneforge_free(struct whisper_aneforge_context * ctx); + +#ifdef __cplusplus +} +#endif diff --git a/src/whisper.cpp b/src/whisper.cpp index 5ffc70af00e..c5179b28035 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -10,6 +10,8 @@ #include "coreml/whisper-encoder.h" #endif +#include "aneforge/whisper-aneforge.h" + #ifdef WHISPER_USE_OPENVINO #include "openvino/whisper-openvino-encoder.h" #endif @@ -899,6 +901,8 @@ struct whisper_state { whisper_coreml_context * ctx_coreml = nullptr; #endif + whisper_aneforge_context * ctx_aneforge = nullptr; + #ifdef WHISPER_USE_OPENVINO whisper_openvino_context * ctx_openvino = nullptr; #endif @@ -1970,7 +1974,9 @@ static bool whisper_encode_external(const whisper_state & wstate) { const bool use_openvino = wstate.ctx_openvino != nullptr; #endif - return use_coreml || use_openvino; + const bool use_aneforge = wstate.ctx_aneforge != nullptr; + + return use_coreml || use_openvino || use_aneforge; } static struct ggml_cgraph * whisper_build_graph_conv( @@ -2409,11 +2415,15 @@ static bool whisper_encode_internal( } else { ggml_backend_sched_reset(sched); + if (wstate.ctx_aneforge != nullptr) { + whisper_aneforge_encode(wstate.ctx_aneforge, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data); + } else { #if defined(WHISPER_USE_COREML) - whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data); + whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data); #elif defined(WHISPER_USE_OPENVINO) - whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc); + whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc); #endif + } } } @@ -3455,6 +3465,18 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { } #endif + if (const char * aneforge_dir = getenv("ANEFORGE_ENCODER")) { + WHISPER_LOG_INFO("%s: loading ANEForge encoder from '%s'\n", __func__, aneforge_dir); + WHISPER_LOG_INFO("%s: compiling for the ANE (one time) ...\n", __func__); + state->ctx_aneforge = whisper_aneforge_init(aneforge_dir); + if (!state->ctx_aneforge) { + WHISPER_LOG_ERROR("%s: failed to load ANEForge encoder from '%s'\n", __func__, aneforge_dir); + whisper_free_state(state); + return nullptr; + } + WHISPER_LOG_INFO("%s: ANEForge encoder loaded\n", __func__); + } + state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx); state->batch = whisper_batch_init(ctx->model.hparams.n_text_ctx, WHISPER_MAX_DECODERS); @@ -3828,6 +3850,11 @@ void whisper_free_state(struct whisper_state * state) { } #endif + if (state->ctx_aneforge != nullptr) { + whisper_aneforge_free(state->ctx_aneforge); + state->ctx_aneforge = nullptr; + } + #ifdef WHISPER_USE_OPENVINO if (state->ctx_openvino != nullptr) { whisper_openvino_free(state->ctx_openvino);