thomasisensee
diff --git a/‎README.md‎
Lines changed: 14 additions & 0 deletions b/‎README.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎benchmarks/README.md‎
Lines changed: 11 additions & 5 deletions b/‎benchmarks/README.md‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎benchmarks/lookup_benchmarks.cpp‎
Lines changed: 48 additions & 10 deletions b/‎benchmarks/lookup_benchmarks.cpp‎
Lines changed: 48 additions & 10 deletions
diff --git a/‎codecov.yml‎
Lines changed: 2 additions & 2 deletions b/‎codecov.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/cppapi.rst‎
Lines changed: 5 additions & 1 deletion b/‎doc/cppapi.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎include/ndtbl/field_group.hpp‎
Lines changed: 69 additions & 28 deletions b/‎include/ndtbl/field_group.hpp‎
Lines changed: 69 additions & 28 deletions
@@ -108,6 +108,20 @@ available before `cmake --build build`.
 When `ndtbl_ENABLE_MMAP=ON`, the C++ `read_group()` path uses read-only memory
 mapping on supported POSIX platforms.
 
+## 📐 Interpolation
+
+The standard C++ lookup path uses multilinear interpolation through explicit
+`evaluate_all_linear()` and `Grid::prepare_linear()` calls. This path uses
+`2^Dim` table points per query and keeps the hot path allocation-free.
+
+The C++ API also exposes experimental local tensor-product cubic interpolation
+through explicit `evaluate_all_cubic()` and `Grid::prepare_cubic()` calls. Cubic
+interpolation uses `4^Dim` table points, can be much more expensive in high
+dimensions, and may overshoot smooth-looking table data enough to produce
+unwanted values. Bounds handling is independent of interpolation order:
+queries outside the table domain can either clamp or throw according to the
+selected `bounds_policy`.
+
 ## 🐍 Python Package
 
 The repository also ships a separate Python package in `python/ndtbl/`.
 
@@ -28,7 +28,7 @@ the next precomputed query modulo the ring size.
 
 ## Measured Operations
 
-`bench_prepare` measures only `Grid::prepare(query)`. This isolates axis
+`bench_prepare` measures only `Grid::prepare_linear(query)`. This isolates axis
 bracketing and interpolation-stencil construction. It is registered once per
 dimension and axis layout because field count does not affect stencil
 preparation.
@@ -39,15 +39,21 @@ stencil and isolates interpolation over all fields. It is registered with `2`,
 `4`, and `8` fields.
 
 `bench_typed_combined` measures
-`FieldGroup::evaluate_all_into(query, results)`. This is the typed end-to-end
-path from query coordinates to interpolated field values. It is registered with
-`2`, `4`, and `8` fields.
+`FieldGroup::evaluate_all_linear_into(query, results)`. This is the typed
+end-to-end path from query coordinates to interpolated field values. It is
+registered with `2`, `4`, and `8` fields.
 
 `bench_runtime_combined` measures
-`RuntimeFieldGroup::evaluate_all_into(query, results)`. This covers the
+`RuntimeFieldGroup::evaluate_all_linear_into(query, results)`. This covers the
 runtime-erased path, including its wrapper dispatch and scratch-buffer copy. It
 is registered with `2`, `4`, and `8` fields.
 
+`bench_typed_cubic_combined` measures one focused cubic case:
+`FieldGroup::evaluate_all_cubic_into(query, results)` for a 4D uniform table
+with `4` fields. Cubic interpolation uses `4^Dim` table points, so a 4D case
+already exercises the important 256-point stencil cost. The suite intentionally
+does not expand cubic across every dimension, axis layout, or field count.
+
 ## Build And Run
 
 Configure and build the benchmark target:
 
@@ -53,7 +53,7 @@ struct LookupContext
   ndtbl::FieldGroup<double, Dim> group;
   ndtbl::RuntimeFieldGroup<Dim> runtime_group;
   std::vector<std::array<double, Dim>> queries;
-  ndtbl::PreparedQuery<Dim> prepared;
+  ndtbl::LinearStencil<Dim> prepared;
 
   LookupContext(const ndtbl::Grid<Dim>& grid_in,
                 const ndtbl::FieldGroup<double, Dim>& group_in,
@@ -62,7 +62,7 @@ struct LookupContext
     , group(group_in)
     , runtime_group(group)
     , queries(queries_in)
-    , prepared(grid.prepare(queries.front()))
+    , prepared(grid.prepare_linear(queries.front()))
   {
   }
 };
@@ -287,7 +287,7 @@ context(std::size_t extent, ndtbl::axis_kind axis_kind, std::size_t field_count)
 /**
  * @brief Benchmark axis bracketing and interpolation-stencil preparation.
  *
- * Measures `Grid::prepare(query)` only.
+ * Measures `Grid::prepare_linear(query)` only.
  *
  * @tparam Dim Benchmark dimensionality.
  * @param state Google Benchmark state.
@@ -305,7 +305,8 @@ bench_prepare(benchmark::State& state,
   std::size_t query = 0;
 
   for (auto _ : state) {
-    ndtbl::PreparedQuery<Dim> prepared = data.grid.prepare(data.queries[query]);
+    ndtbl::LinearStencil<Dim> prepared =
+      data.grid.prepare_linear(data.queries[query]);
     benchmark::DoNotOptimize(prepared);
     query = (query + 1) % data.queries.size();
   }
@@ -342,8 +343,8 @@ bench_prepared_evaluate(benchmark::State& state,
 /**
  * @brief Benchmark typed end-to-end lookup from query coordinates.
  *
- * Measures `FieldGroup::evaluate_all_into(query, results)`, including stencil
- * preparation and interpolation.
+ * Measures `FieldGroup::evaluate_all_linear_into(query, results)`, including
+ * stencil preparation and interpolation.
  *
  * @tparam Dim Benchmark dimensionality.
  * @param state Google Benchmark state.
@@ -363,7 +364,38 @@ bench_typed_combined(benchmark::State& state,
   std::size_t query = 0;
 
   for (auto _ : state) {
-    data.group.evaluate_all_into(data.queries[query], results.data());
+    data.group.evaluate_all_linear_into(data.queries[query], results.data());
+    benchmark::DoNotOptimize(results.data());
+    benchmark::ClobberMemory();
+    query = (query + 1) % data.queries.size();
+  }
+}
+
+/**
+ * @brief Benchmark typed end-to-end cubic lookup from query coordinates.
+ *
+ * Measures `FieldGroup::evaluate_all_cubic_into(query, results)`, including
+ * cubic stencil preparation and interpolation.
+ *
+ * @tparam Dim Benchmark dimensionality.
+ * @param state Google Benchmark state.
+ * @param extent Number of support points per axis.
+ * @param axis_kind Axis representation to benchmark.
+ * @param field_count Number of fields to evaluate at each lookup.
+ */
+template<std::size_t Dim>
+void
+bench_typed_cubic_combined(benchmark::State& state,
+                           std::size_t extent,
+                           ndtbl::axis_kind axis_kind,
+                           std::size_t field_count)
+{
+  const LookupContext<Dim>& data = context<Dim>(extent, axis_kind, field_count);
+  std::vector<double> results(data.group.field_count(), 0.0);
+  std::size_t query = 0;
+
+  for (auto _ : state) {
+    data.group.evaluate_all_cubic_into(data.queries[query], results.data());
     benchmark::DoNotOptimize(results.data());
     benchmark::ClobberMemory();
     query = (query + 1) % data.queries.size();
@@ -373,8 +405,8 @@ bench_typed_combined(benchmark::State& state,
 /**
  * @brief Benchmark runtime-erased end-to-end lookup from query coordinates.
  *
- * Measures `RuntimeFieldGroup::evaluate_all_into(query, results)`, including
- * wrapper dispatch and scratch-buffer handling.
+ * Measures `RuntimeFieldGroup::evaluate_all_linear_into(query, results)`,
+ * including wrapper dispatch and scratch-buffer handling.
  *
  * @tparam Dim Benchmark dimensionality.
  * @param state Google Benchmark state.
@@ -394,7 +426,8 @@ bench_runtime_combined(benchmark::State& state,
   std::size_t query = 0;
 
   for (auto _ : state) {
-    data.runtime_group.evaluate_all_into(data.queries[query], results.data());
+    data.runtime_group.evaluate_all_linear_into(data.queries[query],
+                                                results.data());
     benchmark::DoNotOptimize(results.data());
     benchmark::ClobberMemory();
     query = (query + 1) % data.queries.size();
@@ -440,6 +473,11 @@ NDTBL_REGISTER_LOOKUP_BENCHMARKS(4,
                                  default_extent<4>(),
                                  ndtbl::axis_kind::uniform,
                                  d4_uniform);
+BENCHMARK_CAPTURE(bench_typed_cubic_combined<4>,
+                  d4_uniform_fields_4_cubic_combined,
+                  default_extent<4>(),
+                  ndtbl::axis_kind::uniform,
+                  4);
 NDTBL_REGISTER_LOOKUP_BENCHMARKS(4,
                                  default_extent<4>(),
                                  ndtbl::axis_kind::explicit_coordinates,
 
@@ -8,11 +8,11 @@ coverage:
   status:
     project:
       cpp:
-        target: auto
+        target: 90
         flags:
           - cpp
       python:
-        target: auto
+        target: 90
         flags:
           - python
 
 
@@ -12,9 +12,13 @@ API reference
 .. doxygenclass:: ndtbl::Grid
    :members:
 
-.. doxygenclass:: ndtbl::PreparedQuery
+.. doxygenclass:: ndtbl::TensorStencil
    :members:
 
+.. doxygentypedef:: ndtbl::LinearStencil
+
+.. doxygentypedef:: ndtbl::CubicStencil
+
 .. doxygenclass:: ndtbl::FieldGroup
    :members:
 
 
@@ -18,10 +18,9 @@ namespace ndtbl {
  * memory.
  *
  * The storage layout is point-major in row-major grid order:
- * `point0.field0, point0.field1, ..., point1.field0, ...`
- * where the last grid axis varies fastest before stepping to the next field
- * tuple.
- * so that one prepared interpolation query can accumulate all fields together.
+ * `point0.field0, point0.field1, ..., point1.field0, ...` where the last grid
+ * axis varies fastest before stepping to the next field tuple. One prepared
+ * interpolation stencil can accumulate all fields together.
  */
 template<class Value, std::size_t Dim>
 class FieldGroup
@@ -137,34 +136,37 @@ class FieldGroup
    * @brief Evaluate all fields using a previously prepared interpolation
    * stencil.
    *
-   * @param prepared Prepared query to reuse across fields.
+   * @tparam Stencil Fixed-size interpolation stencil type.
+   * @param stencil Prepared stencil to reuse across fields.
    * @return Interpolated field values in storage order.
-   * @see Grid::prepare
-   * @see evaluate_all(const std::array<double, Dim>&)
+   * @see Grid::prepare_linear
+   * @see Grid::prepare_cubic
+   * @see evaluate_all_linear(const std::array<double, Dim>&)
    */
-  std::vector<Value> evaluate_all(const PreparedQuery<Dim>& prepared) const
+  template<class Stencil>
+  std::vector<Value> evaluate_all(const Stencil& stencil) const
   {
     std::vector<Value> results(field_count(), Value(0));
-    evaluate_all_into(prepared, results.data());
+    evaluate_all_into(stencil, results.data());
     return results;
   }
 
   /**
    * @brief Evaluate all fields using a previously prepared interpolation
    * stencil into caller-provided storage.
    *
-   * @param prepared Prepared query to reuse across fields.
+   * @tparam Stencil Fixed-size interpolation stencil type.
+   * @param stencil Prepared stencil to reuse across fields.
    * @param results Output buffer with space for `field_count()` values.
-   * @see evaluate_all(const PreparedQuery<Dim>&)
+   * @see evaluate_all(const Stencil&)
    */
-  void evaluate_all_into(const PreparedQuery<Dim>& prepared,
-                         Value* results) const
+  template<class Stencil>
+  void evaluate_all_into(const Stencil& stencil, Value* results) const
   {
     std::fill(results, results + field_count(), Value(0));
-    for (std::size_t corner = 0; corner < PreparedQuery<Dim>::corners;
-         ++corner) {
-      const double weight = prepared.weight(corner);
-      const std::size_t base = prepared.point_index(corner) * field_count();
+    for (std::size_t point = 0; point < Stencil::points; ++point) {
+      const double weight = stencil.weight(point);
+      const std::size_t base = stencil.point_index(point) * field_count();
       for (std::size_t field = 0; field < field_count(); ++field) {
         results[field] +=
           static_cast<Value>(weight * interleaved_values_[base + field]);
@@ -173,34 +175,73 @@ class FieldGroup
   }
 
   /**
-   * @brief Evaluate all fields directly from query coordinates.
+   * @brief Evaluate all fields directly from query coordinates using
+   * multilinear interpolation.
    *
    * @param coordinates Query coordinates in grid axis order.
    * @return Interpolated field values in storage order.
    * @param policy Bounds handling behavior for out-of-domain coordinates.
-   * @see evaluate_all(const PreparedQuery<Dim>&)
+   * @see evaluate_all(const Stencil&)
    */
-  std::vector<Value> evaluate_all(
+  std::vector<Value> evaluate_all_linear(
     const std::array<double, Dim>& coordinates,
     bounds_policy policy = bounds_policy::clamp) const
   {
-    return evaluate_all(grid_.prepare(coordinates, policy));
+    return evaluate_all(grid_.prepare_linear(coordinates, policy));
   }
 
   /**
-   * @brief Evaluate all fields directly from query coordinates into
-   * caller-provided storage.
+   * @brief Evaluate all fields directly from query coordinates using
+   * multilinear interpolation into caller-provided storage.
    *
    * @param coordinates Query coordinates in grid axis order.
    * @param results Output buffer with space for `field_count()` values.
    * @param policy Bounds handling behavior for out-of-domain coordinates.
-   * @see evaluate_all_into(const PreparedQuery<Dim>&, Value*)
+   * @see evaluate_all_into(const Stencil&, Value*)
    */
-  void evaluate_all_into(const std::array<double, Dim>& coordinates,
-                         Value* results,
-                         bounds_policy policy = bounds_policy::clamp) const
+  void evaluate_all_linear_into(
+    const std::array<double, Dim>& coordinates,
+    Value* results,
+    bounds_policy policy = bounds_policy::clamp) const
+  {
+    evaluate_all_into(grid_.prepare_linear(coordinates, policy), results);
+  }
+
+  /**
+   * @brief Evaluate all fields directly from query coordinates using local
+   * cubic interpolation.
+   *
+   * Cubic interpolation uses four support points per axis and is therefore
+   * intended for experiments where the additional cost and possible overshoot
+   * are acceptable.
+   *
+   * @param coordinates Query coordinates in grid axis order.
+   * @param policy Bounds handling behavior for out-of-domain coordinates.
+   * @return Cubically interpolated field values in storage order.
+   * @see Grid::prepare_cubic
+   */
+  std::vector<Value> evaluate_all_cubic(
+    const std::array<double, Dim>& coordinates,
+    bounds_policy policy = bounds_policy::clamp) const
+  {
+    return evaluate_all(grid_.prepare_cubic(coordinates, policy));
+  }
+
+  /**
+   * @brief Evaluate all fields directly from query coordinates using local
+   * cubic interpolation into caller-provided storage.
+   *
+   * @param coordinates Query coordinates in grid axis order.
+   * @param results Output buffer with space for `field_count()` values.
+   * @param policy Bounds handling behavior for out-of-domain coordinates.
+   * @see evaluate_all_cubic
+   */
+  void evaluate_all_cubic_into(
+    const std::array<double, Dim>& coordinates,
+    Value* results,
+    bounds_policy policy = bounds_policy::clamp) const
   {
-    evaluate_all_into(grid_.prepare(coordinates, policy), results);
+    evaluate_all_into(grid_.prepare_cubic(coordinates, policy), results);
   }
 
 private: