|
| 1 | +//! AdaWorld backend: implements burn's Backend trait. |
| 2 | +//! |
| 3 | +//! Delegates all tensor operations to ndarray + crate::simd. |
| 4 | +//! This is the entry point — every burn model compiled with `Backend = AdaWorld` |
| 5 | +//! runs on our SIMD dispatch with optional AttentionTable compiled attention. |
| 6 | +//! |
| 7 | +//! # Implementation Status |
| 8 | +//! |
| 9 | +//! The Backend trait requires ~200+ methods across 7 op traits. |
| 10 | +//! Implementation strategy: core ops first (what Whisper/Llama need), |
| 11 | +//! then expand coverage guided by burn-backend-tests. |
| 12 | +//! |
| 13 | +//! Required traits: |
| 14 | +//! FloatTensorOps — 84 required methods (+ ~36 with defaults) |
| 15 | +//! IntTensorOps — ~50 required methods |
| 16 | +//! BoolTensorOps — ~30 required methods |
| 17 | +//! ModuleOps — conv, pool, embedding, etc. |
| 18 | +//! ActivationOps — relu, sigmoid, gelu (most have defaults) |
| 19 | +//! QTensorOps — quantized tensor ops |
| 20 | +//! TransactionOps — batch execution |
| 21 | +//! |
| 22 | +//! # Architecture |
| 23 | +//! |
| 24 | +//! ```text |
| 25 | +//! burn::Tensor<AdaWorld, D> |
| 26 | +//! ↓ (burn dispatches via Backend trait) |
| 27 | +//! AdaWorld::float_matmul(lhs, rhs) |
| 28 | +//! ↓ (check for compiled attention table) |
| 29 | +//! ├── AttentionTable[q_idx][k_idx] → O(1) (if compiled) |
| 30 | +//! └── ndarray general_mat_mul() → O(d) (fallback to BLAS) |
| 31 | +//! ↓ (ndarray delegates to BLAS or matrixmultiply) |
| 32 | +//! crate::simd::F32x16 → AVX-512 / AVX2 via LazyLock dispatch |
| 33 | +//! ``` |
| 34 | +
|
| 35 | +use crate::tensor::AdaTensor; |
| 36 | + |
| 37 | +/// The AdaWorld backend. |
| 38 | +/// |
| 39 | +/// CPU-only. Uses adaworldapi/ndarray with crate::simd SIMD dispatch. |
| 40 | +/// Feature `attention-table` enables bgz-tensor compiled attention path. |
| 41 | +#[derive(Clone, Default, Debug)] |
| 42 | +pub struct AdaWorld; |
| 43 | + |
| 44 | +/// CPU device (unit type — there's only one CPU). |
| 45 | +#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)] |
| 46 | +pub struct CpuDevice; |
| 47 | + |
| 48 | +// NOTE: Full Backend trait implementation requires ~200+ methods across 7 traits. |
| 49 | +// This is tracked as a multi-session effort: |
| 50 | +// |
| 51 | +// Session 1 (current): Crate skeleton + architecture + tensor primitive |
| 52 | +// Session 2: FloatTensorOps core (from_data, matmul, add, mul, exp, reshape, transpose) |
| 53 | +// Session 3: IntTensorOps + BoolTensorOps |
| 54 | +// Session 4: ModuleOps (conv, embedding) + ActivationOps |
| 55 | +// Session 5: QTensorOps + TransactionOps + burn-backend-tests |
| 56 | +// |
| 57 | +// The implementation follows burn-ndarray's pattern but uses: |
| 58 | +// - crate::simd::F32x16 for element-wise ops (not macerator) |
| 59 | +// - LazyLock<SimdDispatch> for runtime tier selection (not compile-time features) |
| 60 | +// - Optional AttentionTable for compiled attention (unique to this backend) |
0 commit comments