From 57093be710d33af62e869cfc0637ea1392a6889a Mon Sep 17 00:00:00 2001
From: Jens Neuse <jens.neuse@gmx.de>
Date: Sat, 18 Apr 2026 19:34:06 +0200
Subject: [PATCH] Add gRPC datasource performance experiments

---
 IMPROVEMENTS.md                               | 1879 +++++++++++++++++
 ...04-17-grpc-datasource-ultra-performance.md |  402 ++++
 .../plans/2026-04-18-grpc-datasource-v2.md    |  158 ++
 .../2026-04-18-grpc-datasource-v2-design.md   |  260 +++
 go.work.sum                                   |   32 +-
 v2/go.mod                                     |    4 +-
 v2/go.sum                                     |    5 +
 .../grpc_datasource/grpc_datasource_v2.go     |  423 ++++
 .../grpc_datasource_v2_bench_test.go          |  404 ++++
 .../grpc_datasource_v2_compile.go             |  462 ++++
 .../grpc_datasource_v2_frame.go               |  386 ++++
 .../grpc_datasource_v2_hyperpb.go             |   37 +
 .../grpc_datasource/grpc_datasource_v2_ir.go  |   93 +
 .../grpc_datasource_v2_runtime.go             |  502 +++++
 .../grpc_datasource_v2_schema.go              |  138 ++
 .../grpc_datasource_v2_test.go                |  603 ++++++
 .../grpc_datasource_v2_wire.go                |  171 ++
 v2/pkg/engine/resolve/datasource.go           |   31 +
 v2/pkg/engine/resolve/loader.go               |   97 +-
 v2/pkg/engine/resolve/resolve.go              |    2 +
 v2/pkg/engine/resolve/resolve_test.go         |  137 ++
 21 files changed, 6208 insertions(+), 18 deletions(-)
 create mode 100644 IMPROVEMENTS.md
 create mode 100644 docs/superpowers/plans/2026-04-17-grpc-datasource-ultra-performance.md
 create mode 100644 docs/superpowers/plans/2026-04-18-grpc-datasource-v2.md
 create mode 100644 docs/superpowers/specs/2026-04-18-grpc-datasource-v2-design.md
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_hyperpb.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go
 create mode 100644 v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_wire.go

diff --git a/IMPROVEMENTS.md b/IMPROVEMENTS.md
new file mode 100644
index 0000000000..25c890865c
--- /dev/null
+++ b/IMPROVEMENTS.md
@@ -0,0 +1,1879 @@
+# gRPC Datasource Improvement Ledger
+
+Date started: 2026-04-17
+Worktree: `/Users/jens/.superset/worktrees/graphql-go-tools/hollow-playroom`
+Scope: `v2/pkg/engine/datasource/grpc_datasource`
+
+This file records the full optimization campaign for the gRPC datasource. Each stage is treated as an experiment. If a stage does not produce the expected architectural or benchmark effect, it should be reverted or reworked before moving on.
+
+## Rules
+
+- Do not commit during this exploration.
+- Keep the benchmark set stable so improvements are comparable.
+- Record both wins and failures.
+- Prefer deleting hot-path work over micro-optimizing it.
+- If a stage does not materially support the radical architecture, revisit scope.
+
+## Benchmark Suite
+
+Primary commands:
+
+```sh
+cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments|BenchmarkBuildDependencyGraph|BenchmarkCompareKeyFields)$' -benchmem ./pkg/engine/datasource/grpc_datasource
+```
+
+Profiling commands:
+
+```sh
+cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load$' -benchmem -cpuprofile /tmp/grpc-ds-load.cpu.out -memprofile /tmp/grpc-ds-load.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource
+cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-load-args.cpu.out -memprofile /tmp/grpc-ds-load-args.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource
+```
+
+## Checklist
+
+- [x] Stage 0: Capture fresh benchmark baseline and hotspot profile snapshots.
+- [x] Stage 1: Introduce a kernel boundary and compile fixed execution stages at datasource construction time.
+- [x] Stage 2: Remove per-request dependency graph creation and topological sorting from `Load`.
+- [x] Stage 3: First request-construction pass: pre-resolve call metadata and eliminate copy-returning hot lookups.
+- [x] Stage 4: First context-extraction pass: replace map-based resolver batches with row-based batches.
+- [ ] Stage 5: Introduce a pluggable protobuf runtime boundary.
+- [x] Stage 6: Add a generated fast path for known schemas.
+- [x] Stage 7: Add a compiled dynamic fast path for runtime schemas.
+- [ ] Stage 8: Replace intermediate response subtree building with a direct response writer.
+- [x] Stage 9: Replace request-byte-keyed pooling with kernel-owned sharded memory.
+- [x] Stage 10: Reprofile, compare end-state vs baseline, and summarize findings.
+- [x] Stage 11: Compile resolver-context extraction into field-number programs (attempted and reverted).
+- [x] Stage 12: Compile a shared-context fast path for batched resolver requests (attempted and reverted).
+- [x] Stage 13: Add a generated-message direct builder for resolver context requests.
+- [x] Stage 14: Add a generated-message response writer fast path for supported schemas.
+- [x] Stage 15: Apply generated resolve outputs directly onto the root response (attempted and reverted).
+- [x] Stage 16: Materialize generated resolve value slices concurrently and attach them sequentially.
+
+## Baseline
+
+Status: captured on 2026-04-17
+
+Benchmarks:
+
+```text
+BenchmarkCompareKeyFields/simple-16                294.4 ns/op      80 B/op       4 allocs/op
+BenchmarkCompareKeyFields/complex-16               757.9 ns/op     304 B/op       9 allocs/op
+BenchmarkCompareKeyFields/long-16                 2058 ns/op      1728 B/op      18 allocs/op
+BenchmarkCompareKeyFields/long_and_nested-16      3067 ns/op      3072 B/op      21 allocs/op
+BenchmarkBuildDependencyGraph-16                   343.1 ns/op     432 B/op       7 allocs/op
+Benchmark_DataSource_Load-16                      2319 ns/op      1852 B/op      30 allocs/op
+Benchmark_DataSource_Load_WithFieldArguments-16 154109 ns/op     84956 B/op    1488 allocs/op
+```
+
+Profiles:
+
+```text
+Benchmark_DataSource_Load:
+- alloc_space hotspots include dynamicpb.NewMessage, NewDependencyGraph, CompileFetches,
+  Message.GetField, TopologicalSortResolve, and JSON escaping.
+
+Benchmark_DataSource_Load_WithFieldArguments:
+- alloc_space hotspots include arena allocation, dynamicpb.Message.Set, dynamicpb.NewMessage,
+  dynamicpb.Message.Mutable, and RPCCompiler.resolveContextData.
+- CPU remains dominated by protobuf/gRPC and runtime profiling overhead, with package-side work
+  still visible in request compilation and context extraction.
+```
+
+## Stage Log Template
+
+Copy this block for each stage:
+
+```md
+## Stage N: Title
+
+Goal:
+
+Hypothesis:
+
+Files touched:
+
+Commands run:
+
+Baseline before stage:
+
+Result after stage:
+
+What worked:
+
+What did not work:
+
+Decision:
+- keep
+
+## Stage 30: Direct Frame-To-Resolver Merge Path
+
+Goal:
+Exploit the new native merge seam instead of routing it back through generic `astjson` subtree materialization and `MergeValuesWithPath` for every native V2 merge.
+
+Hypothesis:
+If `v2NativeMergeResult.MergeInto` can:
+- navigate indexed select paths directly inside the frame
+- merge object nodes straight onto resolver targets
+- only materialize leaf subtrees when necessary
+
+then the seam from Stage 29 stops being just an architectural placeholder and becomes the base for a real fast merge path.
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- added red test for indexed native select path on frame-backed merge results:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2NativeMergeResult_MergeInto_SupportsIndexedSelectPath' -count=1`
+- green verification for the indexed-select test plus existing resolver parity test:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'Test(V2NativeMergeResult_MergeInto_SupportsIndexedSelectPath|DataSourceV2_LoadResult_ResolveMatchesLoadAndLoadValue)$' -count=1`
+- full package verification:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource ./pkg/engine/resolve`
+- repeated V2 native-value vs merge-result benchmarks:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^$' -bench '^(Benchmark_DataSource_V2_Load(Value|Result)(_WithFieldArguments|_FederationRequiresUnion)?)$' -benchmem -count=3`
+
+Baseline before stage:
+- Stage 29 introduced the native merge seam, but `v2NativeMergeResult.MergeInto` still did the expensive thing:
+  - select a frame node
+  - materialize it into a full `astjson.Value`
+  - call `astjson.MergeValuesWithPath`
+- It also could not follow indexed select paths such as `["data","_entities","0"]`.
+
+Result after stage:
+- `selectDataNode` now supports array index segments.
+- `MergeInto` now uses a direct object-merge path for object-to-object merges:
+  - root object merges
+  - batch merges
+  - merge-path leaf object merges
+- Only non-object or fallback shapes still go through full `nodeValue + MergeValuesWithPath`.
+- New correctness coverage is in place for indexed select paths.
+
+Repeated benchmark signal:
+- simple path:
+  - `LoadValue`: `28359-29102 ns/op`, `11434-11441 B/op`, `226 allocs/op`
+  - `LoadResult`: `25072-25622 ns/op`, `11506-11511 B/op`, `229 allocs/op`
+- field-args path:
+  - `LoadValue`: `72345-78721 ns/op`, `49009-49024 B/op`, `964 allocs/op`
+  - `LoadResult`: `72352-77667 ns/op`, `49109-49183 B/op`, `967 allocs/op`
+- federation requires+union path:
+  - `LoadValue`: `67719-69461 ns/op`, `41451-41460 B/op`, `787 allocs/op`
+  - `LoadResult`: `66270-71907 ns/op`, `41564-41626 B/op`, `791 allocs/op`
+
+What worked:
+- The merge runtime now has a real specialization point instead of always falling back to generic merge code.
+- Indexed select paths are now correct for frame-backed results.
+- The direct merge path is recursive and structural:
+  - object fields are merged directly into existing resolver objects
+  - nested merge-path containers are created in-place
+  - full subtree materialization is avoided for the object/object fast path
+- The simple benchmark now shows `LoadResult` faster than `LoadValue` on CPU in this run, which is the first time the seam is not obviously underwater on the fast path.
+
+What did not work:
+- The benchmark suite used here still does not perfectly isolate the new item-merge fast path, because these datasource benchmarks primarily exercise whole-operation native results rather than loader-style target-item merges.
+- Allocation count is still slightly higher on the `LoadResult` path.
+- The heavy and federation-native benchmarks are mixed rather than decisive; they are roughly in the same band as `LoadValue`, not a clean breakthrough yet.
+
+Useful conclusions:
+- This stage fixes a real correctness gap and turns Stage 29’s seam into something the optimizer can actually build on.
+- The next benchmark work should move up one level and measure real resolver/loader merges with V2 native results, not only datasource-local root selection.
+- The next radical optimization opportunity is now clear:
+  - either push this direct merge specialization further into loader-side batch fan-out
+  - or delete more of `astjson` entirely by letting native V2 frames participate in final response writing.
+
+Decision:
+- keep
+
+## Stage 29: Native Loader Merge Boundary For V2 Frames
+
+Goal:
+Delete the remaining loader-side `astjson` subtree materialization boundary for native V2 success paths by letting `grpc_datasource_v2` hand the loader a frame-backed merge result directly.
+
+Hypothesis:
+If the loader can consume a native merge result instead of forcing V2 through `LoadValue`, then we remove one architectural boundary:
+- V2 no longer needs to materialize a full `{data: ...}` `astjson` envelope for loader consumption
+- the loader can merge directly from the native frame-backed result into the resolver arena
+- this becomes the base for future direct final-write work
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+- `v2/pkg/engine/resolve/datasource.go`
+- `v2/pkg/engine/resolve/loader.go`
+- `v2/pkg/engine/resolve/resolve.go`
+- `v2/pkg/engine/resolve/resolve_test.go`
+
+Commands run:
+- added red resolve test for loader preference:
+  - `cd v2 && go test ./pkg/engine/resolve -run 'TestResolver_ArenaResolveGraphQLResponse_PrefersNativeMergeDataSourceAndCallsCleanup' -count=1`
+- added red datasource test for V2 merge-result contract:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_LoadResult_ResolveMatchesLoadAndLoadValue' -count=1`
+- full package verification:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource ./pkg/engine/resolve`
+- synthetic resolve seam benchmark:
+  - `cd v2 && go test ./pkg/engine/resolve -run '^$' -bench '^BenchmarkResolver_ArenaResolveGraphQLResponse_NativeBoundary$' -benchmem -count=3`
+- real V2 frame benchmarks:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^$' -bench '^(Benchmark_DataSource_V2_Load(Value|Result)(_WithFieldArguments|_FederationRequiresUnion)?)$' -benchmem -count=3`
+
+Baseline before stage:
+- Stage 28 left V2 still crossing the loader boundary through `LoadValue` and a materialized `astjson` envelope.
+- There was no native merge-result contract between datasource and loader.
+
+Result after stage:
+- The resolve layer now has an additive `NativeMergeDataSource` / `NativeMergeResult` contract.
+- Loader prefers native merge results over `LoadValue` when available and defers cleanup correctly until after response writing.
+- `grpc_datasource_v2` now exposes `LoadResult` for native success paths and returns a frame-backed `v2NativeMergeResult`.
+- New coverage proves both sides:
+  - loader prefers `LoadResult` and does not fall back to `LoadValue`
+  - V2 `LoadResult` matches `Load` and `LoadValue` output for the resolver-heavy benchmark query
+
+Measured benchmark signal:
+- Synthetic resolve seam benchmark with fake datasources:
+  - `native_value`: `1066-1091 ns/op`, `2203-2204 B/op`, `34 allocs/op`
+  - `native_merge`: `1155-1223 ns/op`, `2259-2260 B/op`, `37 allocs/op`
+- Real V2 pooled-arena comparison:
+  - simple path:
+    - `LoadValue`: `21913-24199 ns/op`, `11424-11427 B/op`, `226 allocs/op`
+    - `LoadResult`: `23915-24886 ns/op`, `11503-11508 B/op`, `229 allocs/op`
+  - field-args path:
+    - `LoadValue`: `68157-72812 ns/op`, `48996-49022 B/op`, `964 allocs/op`
+    - `LoadResult`: `68804-71900 ns/op`, `49096-49216 B/op`, `967 allocs/op`
+  - federation requires+union path:
+    - `LoadValue`: `63621-66266 ns/op`, `41460-41461 B/op`, `788 allocs/op`
+    - `LoadResult`: `64650-68670 ns/op`, `41617-41621 B/op`, `791 allocs/op`
+
+What worked:
+- The architectural seam is now real and additive. Other datasources are unaffected.
+- Cleanup ownership is explicit across datasource -> loader -> resolver.
+- Native V2 success paths can now hand off a frame-backed result without first materializing a response envelope value.
+- This is the right boundary for future work:
+  - direct final-write from V2 frames
+  - merge-time specialization that avoids `astjson.MergeValuesWithPath`
+  - loader-side native handling for batch fan-out without subtree reification
+
+What did not work:
+- The first benchmark signal is not a win yet.
+- The synthetic resolve benchmark is misleading for real V2 because the fake merge result still builds ordinary `astjson` values, so it mostly measures interface overhead.
+- The real V2 datasource-level comparison is roughly neutral to slightly worse for `LoadResult + MergeInto` than `LoadValue`.
+- In other words, deleting the boundary alone is not enough. The merge side still pays for generic `astjson` materialization and path-based merging, so the new seam is currently an enabler more than an isolated speedup.
+
+Useful conclusions:
+- This stage should not be judged as a standalone throughput optimization.
+- It should be judged as a prerequisite architectural step that makes the next radical work possible on the correct side of the boundary.
+- The next meaningful optimization must now exploit this seam:
+  - specialize merge from frame nodes into resolver targets
+  - or bypass `astjson` entirely for native V2 final writing in the resolver path
+
+Decision:
+- keep
+
+## Stage 28: Native Oneof / Fragment Response Execution
+
+Goal:
+Delete the next federation fallback wall by making V2 compile and execute fragment-driven oneof response messages natively, especially on queries that combine `@requires` with union/interface resolver output.
+
+Hypothesis:
+If V2 can compile response programs for oneof-backed protobuf wrapper messages and dispatch fragment materialization based on the active branch at runtime, then federation queries like `tagSummary + storageStatus` should stop falling back and produce another structural runtime drop.
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- added red test for native compilation of a federation `@requires + union resolver` query
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_CompilesNativeProgramForFederationRequiresAndUnionResolve' -count=1`
+- implemented oneof/fragment response compilation and runtime dispatch
+- added parity test for native load/loadValue on the same query
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_CompilesNativeProgramForFederationRequiresAndUnionResolve|TestDataSourceV2_LoadValue_FederationRequiresAndUnionResolveMatchesLoad' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource ./pkg/engine/resolve`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_V1_Load_FederationRequiresUnion|Benchmark_DataSource_V2_Load_FederationRequiresUnion|Benchmark_DataSource_V2_LoadValue_FederationRequiresUnion)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- The new red test failed with:
+  - `fallback reasons: [call 1 (ResolveStorageStorageStatus): oneof or fragment-driven response messages are not yet supported natively]`
+- No benchmark existed yet for this exact path, but the expected comparison target was a fallback-heavy federation path similar to the pre-Stage-27 ceiling.
+
+Result after stage:
+- The federation `@requires + union resolver` query now compiles natively in V2.
+- New repeated measurements:
+  - `Benchmark_DataSource_V1_Load_FederationRequiresUnion`: `93396-102076 ns/op`, `72637-72774 B/op`, `1216-1217 allocs/op`
+  - `Benchmark_DataSource_V2_Load_FederationRequiresUnion`: `67333-71486 ns/op`, `42485-42504 B/op`, `794-795 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_FederationRequiresUnion`: `66232-70917 ns/op`, `41467-41474 B/op`, `787-788 allocs/op`
+
+What worked:
+- V2 response programs now support oneof/fragment-driven response shapes by:
+  - compiling per-concrete-type fragment programs
+  - detecting the active oneof branch at runtime
+  - materializing the matching fragment fields into the final response object
+- The targeted query is now fully native and parity-tested.
+- This is another structural improvement, not a micro-win:
+  - V2 byte path beats V1 by roughly `26-35 us/op`
+  - bytes/op drop by roughly `30 KB`
+  - allocs/op drop by roughly `420`
+  - native `LoadValue` trims another small layer beyond that
+
+What did not work:
+- The response compiler still does not support every possible fragment-driven shape. This stage specifically unlocked the oneof-backed resolver output used by the benchmarked federation query.
+- Common fields on interface/union selections are not yet benchmarked explicitly. The current implementation is strongest where selection is mostly fragment-driven.
+- This stage broadens native response coverage, but it does not yet address the deeper loader-side `astjson` materialization boundary.
+
+Useful conclusions:
+- The next remaining coverage barrier is no longer plain federation or simple oneof response support. The major native coverage classes are expanding quickly now.
+- The strongest next radical step is likely one of:
+  - broader interface/union coverage with common-field handling
+  - or the loader/render boundary that still forces native V2 responses through `astjson`
+
+Decision:
+- keep
+
+## Stage 21: Native V2 Response Frame Arena
+
+Goal:
+Delete `astjson` from the native V2 success path and replace it with a compiled response-frame runtime that stores only response slots and serializes the final GraphQL payload directly.
+
+Hypothesis:
+If native V2 stops building `astjson.Value` trees and instead writes into a compact frame graph with direct JSON serialization, the heavy benchmark should drop further on bytes/op and allocs/op, and the remaining response-side cost should become small enough that protobuf and context extraction dominate.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- added red test for response-frame data-envelope serialization
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2ResponseFrameBuilder_MarshalDataEnvelope' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2ResponseFrameBuilder_MarshalDataEnvelope|TestDataSourceV2_' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_V1_Load|Benchmark_DataSource_V2_Load|Benchmark_DataSource_V1_Load_WithFieldArguments|Benchmark_DataSource_V2_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_V2_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-v2-frame.cpu.out -memprofile /tmp/grpc-ds-v2-frame.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go tool pprof -top /tmp/grpc-ds-v2-frame.cpu.out`
+- `cd v2 && go tool pprof -top /tmp/grpc-ds-v2-frame.mem.out`
+
+Baseline before stage:
+- Stable reference before this stage:
+  - `Benchmark_DataSource_V2_Load`: `23098-24247 ns/op`, `12137-12142 B/op`, `240 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `68082-72125 ns/op`, `50678-50689 B/op`, `1022 allocs/op`
+
+Result after stage:
+- First frame-writer cut, without request-local reuse:
+  - `Benchmark_DataSource_V2_Load`: `24245-25115 ns/op`, `14278-14280 B/op`, `230 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `69826-73300 ns/op`, `51583-51594 B/op`, `980 allocs/op`
+- Final kept version, after converting the frame writer into a reusable request-local arena:
+  - `Benchmark_DataSource_V2_Load`: `27802-29088 ns/op`, `11492-11496 B/op`, `225 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `69281-73091 ns/op`, `47844-47864 B/op`, `955 allocs/op`
+
+What worked:
+- Native V2 no longer builds `astjson.Value` trees on the success path.
+- Resolver and standard fetches now attach into an index-based response frame and serialize once at the end as `{"data":...}`.
+- The second step of the stage, turning the frame graph into a reusable request-local arena, was necessary and paid off:
+  - heavy benchmark bytes/op dropped from about `50.7 KB` to about `47.8 KB`
+  - heavy benchmark allocs/op dropped from `1022` to `955`
+  - simple benchmark bytes/op dropped from about `12.1 KB` to about `11.5 KB`
+  - simple benchmark allocs/op dropped from `240` to `225`
+- Relative to V1, the native V2 heavy path is now materially leaner:
+  - `~69-73 us/op` vs `~107-119 us/op`
+  - `~47.8 KB/op` vs `~83.8 KB/op`
+  - `955 allocs/op` vs `1483-1485 allocs/op`
+- Reprofile after the arena conversion shows the response side is no longer a major structural blocker. Package-side hotspots are now narrower:
+  - `v2ContextProgram.extractRows`
+  - `v2ResolvePathProgram.extractFromMessage`
+  - `v2ResponseFrameBuilder.appendNodeJSON`
+  - protobuf unmarshal and gRPC transport remain larger than the V2 response writer itself
+
+What did not work:
+- The first frame-writer implementation was not good enough by itself. It reduced alloc count but regressed both CPU and bytes/op because it still allocated fresh node slices every request.
+- Even after the arena conversion, the simple benchmark regressed on CPU versus the previous Stage 20 implementation. This is the main cost of the new architecture today.
+- The new serializer now pays visible cost in quoted field-name and string emission (`strconv.AppendQuote` / `appendQuotedWith` shows up in alloc-space profiles).
+- The remaining V2 ceiling is now concentrated in:
+  - resolver context extraction and path walking
+  - protobuf unmarshal / gRPC transport
+  - string-heavy final serialization rather than object-tree construction
+
+Decision:
+- keep
+- follow up with a more aggressive compiled serializer and a lower-allocation resolve-context runtime
+
+## V2 Breakthrough Checklist
+
+- [x] 1. Add an optional resolve interface for arena-rooted datasource values so loaders can consume native results without forcing every datasource off the byte contract.
+- [x] 2. Move `grpc_datasource_v2` off final byte serialization for native paths and hand native values directly to the loader with explicit cleanup.
+- [x] 3. Integrate a dynamic-schema decode backend in V2 that can replace reflective output decode on runtime-loaded schemas.
+- [x] 4. Add backend-aware output allocation in V2 so the compiler/runtime can skip allocations that the decode backend will replace.
+- [x] 5. Attack request encoding with a true V2 wire-format request path, keeping fallback for unsupported shapes.
+- [x] 6. Add honest end-to-end happy-path and federated fan-out benchmarks for V2 so the next ceiling is measured correctly.
+
+## Stage 22: Optional Native Loader Contract
+
+Goal:
+Create the first boundary needed for the next architectural jump: let loaders consume an arena-rooted datasource value directly, while preserving the existing byte-returning `DataSource` interface for compatibility.
+
+Hypothesis:
+If the new contract is additive instead of breaking, we can land the loader boundary first, verify lifecycle correctness, and then move `grpc_datasource_v2` onto it in the next stage without destabilizing the rest of the engine.
+
+Files touched:
+- `v2/pkg/engine/resolve/datasource.go`
+- `v2/pkg/engine/resolve/loader.go`
+- `v2/pkg/engine/resolve/resolve.go`
+- `v2/pkg/engine/resolve/resolve_test.go`
+
+Commands run:
+- added red test for native-value datasource preference and cleanup lifecycle
+- `cd v2 && go test ./pkg/engine/resolve -run 'TestResolver_ArenaResolveGraphQLResponse_UsesNativeValueDataSourceAndCallsCleanup' -count=1`
+- `cd v2 && go test ./pkg/engine/resolve`
+
+Baseline before stage:
+- Loader only knew how to consume datasource bytes.
+- `DataSource` had a single contract: `Load(...) ([]byte, error)`.
+- Any datasource-native response graph had to be serialized and reparsed before merge.
+
+Result after stage:
+- `resolve.NativeDataSource` now exists as an additive interface with `LoadValue` / `LoadWithFilesValue`.
+- `Loader` prefers the native interface when available and keeps the old byte path as fallback.
+- `Loader` now tracks datasource cleanup callbacks and runs them in `Loader.Free()`.
+- `Resolver` now explicitly calls `t.loader.Free()` after each top-level GraphQL response resolution.
+- Focused lifecycle test passes, and the full `resolve` package is green.
+
+What worked:
+- The contract change stayed additive, so existing datasources and generated gomocks did not need a repo-wide rewrite.
+- The red test proved the exact behavior we need for the next stage:
+  - the loader uses the native value path instead of the legacy byte path
+  - cleanup is called exactly once after response writing has finished
+- Making `Loader.Free()` real was the correct lifecycle point because it sits after resolution/output consumption, not during merge.
+
+What did not work:
+- The first implementation dropped the shared `err` variable in `mergeResult`; fixed immediately once the build failed.
+- This stage does not improve benchmarks by itself. It only opens the architectural seam needed for the larger V2 changes.
+
+Decision:
+- keep
+- step 1 complete
+- move to step 2 next
+
+## Stage 23: Native `grpc_datasource_v2` Value Handoff
+
+Goal:
+Move native V2 execution off the final byte boundary so the loader can consume datasource-owned values directly and only the legacy byte interface pays the final marshal step.
+
+Hypothesis:
+If V2 exposes `LoadValue` natively, the new loader contract from Stage 22 can start paying off immediately:
+- native callers stop forcing final byte serialization
+- cleanup stays explicit and correctly scoped
+- the old `Load` contract remains intact by layering byte marshaling on top of `LoadValue`
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- added red test for native V2 value loading parity
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_LoadValue_ResolveMatchesLoad' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource ./pkg/engine/resolve`
+
+Baseline before stage:
+- `grpc_datasource_v2` still returned bytes only.
+- Even native V2 execution paid a final `frame -> bytes` conversion before the loader could consume the result.
+
+Result after stage:
+- `DataSourceV2` now implements both `resolve.DataSource` and `resolve.NativeDataSource`.
+- `LoadValue` / `LoadWithFilesValue` return datasource-owned `*astjson.Value` trees with explicit cleanup.
+- `Load` is now layered on top of `LoadValue` for compatibility.
+- Focused parity test for resolver-heavy native execution is green.
+
+What worked:
+- The boundary is now real. Native callers can stay on value objects without forcing a final byte round trip.
+- Compatibility stayed simple because the old `Load` path is still available and now reuses the native implementation rather than duplicating logic.
+- Cleanup ownership is explicit and aligned with the loader lifecycle introduced in Stage 22.
+
+What did not work:
+- This is not yet a zero-copy final handoff. Native V2 still materializes an `astjson.Value` tree from the response frame for the native contract.
+- No isolated benchmark was taken for this stage alone; the measured impact shows up later in Stage 26.
+
+Decision:
+- keep
+- step 2 complete
+
+## Stage 24: Dynamic Decode Backend And Backend-Aware Output Allocation
+
+Goal:
+Replace reflective dynamic output decode in V2 with a real runtime backend for dynamic schemas and teach the runtime to allocate output containers according to the backend in use.
+
+Hypothesis:
+If V2 can decode runtime-loaded schemas with `hyperpb` instead of generic reflective messages, then dynamic-schema execution will stop paying the worst dynamic decode overhead. If output allocation becomes backend-aware at the same time, the runtime can stop allocating containers that the decode backend will immediately replace.
+
+Files touched:
+- `v2/go.mod`
+- `v2/go.sum`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_hyperpb.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- `cd v2 && go get buf.build/go/hyperpb@v0.1.3`
+- added red test for dynamic decode message allocation
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2MessageRuntime_NewDecodeMessage_UsesHyperpbWhenGeneratedTypeMissing' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2MessageRuntime_NewDecodeMessage_UsesHyperpbWhenGeneratedTypeMissing|TestDataSourceV2_LoadValue_ResolveMatchesLoad' -count=1`
+
+Baseline before stage:
+- V2 still fell back to generic reflective decode for runtime-loaded schemas.
+- Output allocation did not yet understand that different backends own different output containers.
+
+Result after stage:
+- V2 now compiles and stores a `hyperpb` message type for every schema runtime message.
+- Runtime output allocation is backend-aware:
+  - generated schema path uses generated protobuf messages
+  - dynamic schema path can allocate `hyperpb` messages with per-call shared ownership
+  - generic reflective allocation remains as the final fallback
+- gRPC invocation now routes through a codec that can unmarshal directly into `hyperpb` messages.
+
+What worked:
+- Dynamic schemas are now handled by a real decode backend from day one instead of only by the reflective fallback.
+- Backend-aware output ownership made the change structurally correct rather than bolting `hyperpb` onto the side.
+- The new decode backend integrates cleanly with both native `LoadValue` and legacy `Load`.
+
+What did not work:
+- This stage does not remove all reflective behavior. Generated and unsupported paths still coexist with generic fallbacks.
+- `hyperpb` types are compiled eagerly as part of schema runtime construction, which adds cold-path setup cost in exchange for hot-path speed.
+- The benchmark effect was validated later as part of the full step-6 measurement pass rather than as a standalone stage benchmark.
+
+Decision:
+- keep
+- steps 3 and 4 complete
+
+## Stage 25: True V2 Wire-Format Request Path
+
+Goal:
+Delete protobuf request marshaling work for the subset of request shapes already represented in the V2 IR by emitting protobuf wire bytes directly from the compiled request program.
+
+Hypothesis:
+If V2 can compile request programs into a wire plan, then supported request shapes can bypass message marshaling entirely:
+- no request-side protobuf object materialization
+- lower allocation pressure on the input side
+- clean fallback to the existing protobuf-message build when a shape is not yet supported
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_hyperpb.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_wire.go`
+
+Commands run:
+- added red test for direct wire-plan request input generation
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2RequestProgram_BuildInput_UsesWirePlanForNestedRequest' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestV2RequestProgram_BuildInput_UsesWirePlanForNestedRequest|TestV2MessageRuntime_NewDecodeMessage_UsesHyperpbWhenGeneratedTypeMissing|TestDataSourceV2_LoadValue_ResolveMatchesLoad' -count=1`
+
+Baseline before stage:
+- V2 still built protobuf request messages and then marshaled them even for request shapes already fully known to the compiled IR.
+
+Result after stage:
+- `v2RequestProgram` can now compile a direct wire plan for supported shapes.
+- Supported request programs return a pre-marshaled protobuf input object instead of building a protobuf message.
+- The gRPC codec reuses those bytes directly.
+- Unsupported shapes still fall back to the existing protobuf-message build path.
+
+What worked:
+- The request path now has a real architectural escape hatch from protobuf message marshaling.
+- The implementation stays safe because unsupported features do not try to force themselves through the wire encoder.
+- Nested message input works, and the focused wire-plan test is green.
+
+What did not work:
+- The wire plan is intentionally incomplete:
+  - no enum input support
+  - no resolve-context request path
+  - unsupported shapes fall back
+- Repeated numeric fields currently use a simple repeated-field encoding rather than packed canonical encoding. It is valid, but not yet the final polished encoder.
+- Like Stage 24, the benchmark effect is measured in the full pass later rather than from an isolated pre/post run here.
+
+Decision:
+- keep
+- step 5 complete
+
+## Stage 26: Honest Native-Value And Federation Fan-Out Benchmarks
+
+Goal:
+Measure the actual post-Stage-25 V2 shape instead of only the legacy byte-returning path. Add native `LoadValue` benchmarks and a federation fan-out benchmark so the next ceiling is visible.
+
+Hypothesis:
+If the new native boundary and request/decode changes are real, then:
+- `LoadValue` should show a modest but measurable win over `Load` on native V2 paths
+- federation/entity fan-out should expose whether V2 is still paying legacy byte/fallback overhead
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- added red parity test for federation fan-out native value loading
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_LoadValue_FederationFanoutMatchesLoad' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_LoadValue_ResolveMatchesLoad|TestDataSourceV2_LoadValue_FederationFanoutMatchesLoad' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource ./pkg/engine/resolve`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_V1_Load|Benchmark_DataSource_V2_Load|Benchmark_DataSource_V2_LoadValue|Benchmark_DataSource_V1_Load_WithFieldArguments|Benchmark_DataSource_V2_Load_WithFieldArguments|Benchmark_DataSource_V2_LoadValue_WithFieldArguments|Benchmark_DataSource_V1_Load_FederationFanout|Benchmark_DataSource_V2_Load_FederationFanout|Benchmark_DataSource_V2_LoadValue_FederationFanout)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- V2 benchmark coverage only measured the legacy byte-returning interface for the simple and resolver-heavy query shapes.
+- There was no benchmark for the new native loader/value boundary.
+- There was no benchmark for federation/entity resolver fan-out.
+
+Result after stage:
+- Added three new benchmark surfaces:
+  - `Benchmark_DataSource_V2_LoadValue`
+  - `Benchmark_DataSource_V2_LoadValue_WithFieldArguments`
+  - `Benchmark_DataSource_V2_LoadValue_FederationFanout`
+- Added V1/V2 federation fan-out byte-path benchmarks.
+- Fresh repeated measurements:
+  - `Benchmark_DataSource_V1_Load`: `33743-36344 ns/op`, `14912-14943 B/op`, `286 allocs/op`
+  - `Benchmark_DataSource_V2_Load`: `24206-27385 ns/op`, `11548-11557 B/op`, `230 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue`: `23868-25793 ns/op`, `11292-11297 B/op`, `225 allocs/op`
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `104360-109145 ns/op`, `83902-83937 B/op`, `1487 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `70428-73123 ns/op`, `49864-49876 B/op`, `970 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_WithFieldArguments`: `69844-73502 ns/op`, `48840-48854 B/op`, `963 allocs/op`
+  - `Benchmark_DataSource_V1_Load_FederationFanout`: `72981-75632 ns/op`, `43381-43404 B/op`, `756 allocs/op`
+  - `Benchmark_DataSource_V2_Load_FederationFanout`: `74285-77329 ns/op`, `44489-44490 B/op`, `764 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_FederationFanout`: `74155-76847 ns/op`, `43470-43477 B/op`, `757 allocs/op`
+
+What worked:
+- The native value boundary is real and measurable:
+  - simple native path saves about `250 B/op` and `5 allocs/op` versus V2 `Load`
+  - resolver-heavy native path saves about `1.0 KB/op` and `7 allocs/op` versus V2 `Load`
+- V2 remains materially ahead of V1 on the native happy-path benchmarks.
+- The new federation fan-out benchmark exposed a real architectural truth:
+  - V2 `Load` is still paying extra overhead on this path
+  - V2 `LoadValue` removes most of that memory/alloc gap immediately
+- The parity test proves the new native federation path returns the same data as the byte path.
+
+What did not work:
+- Federation fan-out is not yet a breakthrough case for V2 on CPU. On the byte path it is slightly slower and slightly fatter than V1.
+- The native value handoff helps federation memory more than CPU because the current fan-out path still leans on fallback execution and legacy behavior preservation.
+- The native `LoadValue` benchmarks are still end-to-end datasource benchmarks, not full-engine resolver benchmarks. They expose the datasource boundary better, but not the whole resolver pipeline.
+
+Useful conclusions:
+- The next ceiling is now sharply defined:
+  - native V2 hot paths benefit from the new boundary and runtime
+  - fallback/federation-heavy paths still carry too much legacy cost
+- The next radical step should target one of:
+  - extending native V2 coverage deeper into federation/entity execution
+  - deleting more fallback serialization work on mixed-mode paths
+
+Decision:
+- keep
+- step 6 complete
+
+## V2 Breakthrough Status
+
+Current measured V2 shape after completing the checklist:
+
+- Native happy path:
+  - `Benchmark_DataSource_V2_Load`: `24206-27385 ns/op`, `11548-11557 B/op`, `230 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue`: `23868-25793 ns/op`, `11292-11297 B/op`, `225 allocs/op`
+- Resolver-heavy path:
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `70428-73123 ns/op`, `49864-49876 B/op`, `970 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_WithFieldArguments`: `69844-73502 ns/op`, `48840-48854 B/op`, `963 allocs/op`
+- Federation fan-out:
+  - `Benchmark_DataSource_V1_Load_FederationFanout`: `72981-75632 ns/op`, `43381-43404 B/op`, `756 allocs/op`
+  - `Benchmark_DataSource_V2_Load_FederationFanout`: `74285-77329 ns/op`, `44489-44490 B/op`, `764 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_FederationFanout`: `74155-76847 ns/op`, `43470-43477 B/op`, `757 allocs/op`
+
+Interpretation:
+
+- The checklist work landed real wins. V2 now has:
+  - a native value boundary
+  - a dynamic decode backend
+  - backend-aware output ownership
+  - a true wire-format request fast path for supported input shapes
+  - benchmark coverage that can see the native boundary and federation fan-out ceiling
+- The native value handoff is worth keeping, but it is not itself the next breakthrough. It removes a measurable thin layer; it does not solve mixed-mode fallback cost.
+- Federation fan-out is now the cleanest evidence of where the architecture still needs radical work: native happy paths are ahead, but mixed-mode/entity paths still preserve too much of the old datasource behavior.
+
+## Stage 27: Native Federation Entity Execution
+
+Goal:
+Delete the fallback wall on the federation benchmark path by making V2 compile and execute entity-rooted fan-out plans natively instead of routing `_entities` plus resolver arguments back through v1.
+
+Hypothesis:
+If V2 can natively compile:
+- `CallKindEntity` root fetches
+- enum-valued request arguments on the generic request path
+- optional scalar wrapper request fields
+and validate federated entity counts without leaving the native runtime, then the federation benchmark should stop looking like a mixed-mode fallback path and collapse toward the native happy-path cost shape.
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- added red test for native federation fan-out compilation
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_CompilesNativeProgramForFederationFanout' -count=1`
+- fixed compile/runtime blockers iteratively:
+  - enum request fields
+  - optional scalar wrapper request fields
+  - static `__typename` federation validation
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2_CompilesNativeProgramForFederationFanout|TestDataSourceV2_LoadValue_FederationFanoutMatchesLoad' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource ./pkg/engine/resolve`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_V1_Load_FederationFanout|Benchmark_DataSource_V2_Load_FederationFanout|Benchmark_DataSource_V2_LoadValue_FederationFanout)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- From Stage 26:
+  - `Benchmark_DataSource_V1_Load_FederationFanout`: `72981-75632 ns/op`, `43381-43404 B/op`, `756 allocs/op`
+  - `Benchmark_DataSource_V2_Load_FederationFanout`: `74285-77329 ns/op`, `44489-44490 B/op`, `764 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_FederationFanout`: `74155-76847 ns/op`, `43470-43477 B/op`, `757 allocs/op`
+- Native V2 still treated the federation fan-out benchmark as a fallback-heavy path because:
+  - `CallKindEntity` was not compiled natively
+  - enum request fields were rejected
+  - optional scalar wrapper request fields were rejected
+
+Result after stage:
+- Fresh repeated measurements:
+  - `Benchmark_DataSource_V1_Load_FederationFanout`: `71409-77628 ns/op`, `43405-43503 B/op`, `756 allocs/op`
+  - `Benchmark_DataSource_V2_Load_FederationFanout`: `32076-34591 ns/op`, `16043-16045 B/op`, `299 allocs/op`
+  - `Benchmark_DataSource_V2_LoadValue_FederationFanout`: `31697-33493 ns/op`, `15785-15786 B/op`, `294 allocs/op`
+
+What worked:
+- The federation benchmark path now compiles natively in V2:
+  - the new red test proves `nativeOperation == true`
+  - the benchmark-shaped plan compiles as `Entity -> Resolve` without fallback
+- The generic V2 request path is materially stronger now:
+  - enum request values are supported natively
+  - optional scalar wrapper request fields are supported natively
+- Federation validation stayed behavior-preserving by adding native entity-count validation for V2 entity responses, including static `__typename` handling.
+- The benchmark result is a real architectural breakthrough:
+  - V2 federation byte path dropped from roughly `74-77 us` to `32-35 us`
+  - bytes/op dropped from roughly `44.5 KB` to `16.0 KB`
+  - allocs/op dropped from `764` to `299`
+  - native `LoadValue` trims another small layer on top:
+    - about `250 B/op`
+    - about `5 allocs/op`
+
+What did not work:
+- The first implementation only lifted `Entity`/`Required` kind routing and was not enough; the red test stayed red because enum request inputs were still rejected.
+- After enum support, the path still failed because optional scalar wrappers were still compile-time fallbacks.
+- Federation validation initially crashed on static `__typename` because the new validator assumed every typename had a field runtime; that had to be fixed explicitly.
+- This stage is benchmarked only on the federation fan-out surface so far. It should be included in the next full V1/V2 sweep before broader conclusions are locked in.
+
+Useful conclusions:
+- The prior “federation is still the cleanest evidence of the remaining ceiling” statement is no longer true for this benchmark shape. This stage deleted that ceiling.
+- The next radical target has moved again:
+  - either broader federation/entity coverage beyond this benchmark shape
+  - or the remaining generic response/value materialization cost on already-native paths
+
+Decision:
+- keep
+
+## Stage 20: Direct Response Attachment In `grpc_datasource_v2`
+
+Goal:
+Delete the remaining `response -> astjson subtree -> merge` execution shape from native v2. Keep the protobuf outputs from each fetch, then attach them directly into the final response tree with compiled response programs.
+
+Hypothesis:
+If native v2 stops materializing per-fetch response objects and instead:
+- writes root standard responses directly onto the final root object
+- resolves resolver targets once from `ResponsePath`
+- attaches row-aligned resolver values directly onto those target objects
+then the remaining response-side allocation pressure should drop again, especially on the resolver-heavy benchmark.
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench 'Benchmark_DataSource_(V1|V2)_' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_V2_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-v2-direct-attach.cpu.out -memprofile /tmp/grpc-ds-v2-direct-attach.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top /tmp/grpc-ds-v2-direct-attach.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-v2-direct-attach.mem.out`
+
+Baseline before stage:
+- Stage 19 comparison:
+  - `Benchmark_DataSource_V1_Load`: `29169-30904 ns/op`, `14895-14920 B/op`, `286 allocs/op`
+  - `Benchmark_DataSource_V2_Load`: `24780-26531 ns/op`, `12261-12272 B/op`, `243 allocs/op`
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `104227-113210 ns/op`, `83924-83962 B/op`, `1487 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `69682-75128 ns/op`, `52289-52296 B/op`, `1063 allocs/op`
+- At that point, v2 still built intermediate `astjson` objects for each fetch and then merged them, even though the protobuf runtime was already much faster.
+
+What changed architecturally:
+- The kernel no longer stores intermediate JSON responses for native fetches.
+- Each fetch goroutine now returns only the protobuf output plus fetch metadata.
+- Response programs gained direct-attach methods:
+  - apply object fields directly to the root tree
+  - resolve resolver targets from `ResponsePath`
+  - materialize only the exact value being attached to each target field
+- The old `write + mergeValues/mergeWithPath` path is gone from native v2 execution.
+
+What worked:
+- The simple benchmark improved again:
+  - v1: `28271-29450 ns/op`, `14891-14915 B/op`, `286 allocs/op`
+  - v2: `23098-24247 ns/op`, `12137-12142 B/op`, `240 allocs/op`
+- The heavy benchmark improved again too:
+  - v1: `101742-105839 ns/op`, `83931-83972 B/op`, `1487-1488 allocs/op`
+  - v2: `68082-72125 ns/op`, `50678-50689 B/op`, `1022 allocs/op`
+- Compared with the previous stage, the heavy benchmark dropped by roughly:
+  - `1.6-3.0 us/op`
+  - about `1.6 KB/op`
+  - about `41 allocs/op`
+- Profiling confirms the architectural intent:
+  - the old subtree-building/merge path is no longer the main package-side shape
+  - remaining package costs are narrower and more local:
+    - `v2ContextProgram.extractRows`
+    - `v2ResolvePathProgram.extractFromMessage`
+    - `v2ResponseFieldProgram.materialize`
+    - `v2ResponseProgram.attachResolve`
+
+What did not work:
+- This stage did not eliminate `astjson` allocations entirely. The final response tree still uses `astjson`, so the next ceiling is now the cost of materializing the exact attached values rather than whole fetch subtrees.
+- The profile still shows meaningful transport and protobuf unmarshal cost on the heavy benchmark.
+- The benchmark harness itself contributes noticeable server-side allocation noise (`createSubcategories`, mock service response construction).
+
+Useful conclusions:
+- The response-side architectural rewrite was worth doing. The old subtree model was still materially expensive even after the generated protobuf runtime landed.
+- The remaining package-side cost is now much more focused:
+  - resolver context row extraction
+  - scalar/object value materialization for direct attach
+- The next radical move should likely target one of two things:
+  - a lower-allocation context extraction model
+  - a response writer that bypasses `astjson` object/value construction even further
+
+Decision:
+- keep
+
+## Stage 19: Generated-Message Runtime Backend For `grpc_datasource_v2`
+
+Goal:
+Replace `dynamicpb` as the primary hot-path message container in v2 when generated Go protobuf types are linked. Keep dynamic schemas working from day one by preserving `dynamicpb` as the fallback backend.
+
+Hypothesis:
+If the v2 schema runtime can allocate generated protobuf messages for request roots, response roots, and resolver-context rows, then the largest remaining protobuf runtime costs should fall sharply:
+- fewer allocations during request build and response unmarshal
+- lower bytes/op on the resolver-heavy benchmark
+- materially better CPU due to generated marshal/unmarshal paths
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench 'Benchmark_DataSource_(V1|V2)_' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_V2_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-v2-generated.cpu.out -memprofile /tmp/grpc-ds-v2-generated.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top /tmp/grpc-ds-v2-generated.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-v2-generated.mem.out`
+
+Baseline before stage:
+- Stage 18 comparison:
+  - `Benchmark_DataSource_V1_Load`: `28630-29491 ns/op`, `14894-14918 B/op`, `286 allocs/op`
+  - `Benchmark_DataSource_V2_Load`: `25662-27261 ns/op`, `15008-15012 B/op`, `291 allocs/op`
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `102533-110449 ns/op`, `83873-83975 B/op`, `1485-1488 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `95456-112489 ns/op`, `81777-81797 B/op`, `1526-1528 allocs/op`
+- At that point, the heavy path was inside v2, but it was still paying for `dynamicpb` object creation and generic protobuf runtime overhead.
+
+What changed architecturally:
+- `v2MessageRuntime` now has two real backends:
+  - generated message types when linked
+  - `dynamicpb` otherwise
+- Each compiled field runtime now carries both descriptor families:
+  - compiler/dynamic descriptor
+  - generated descriptor
+- Field access and mutation choose the correct descriptor at runtime based on the actual message backend.
+- The gRPC boundary now passes concrete `proto.Message` values via `protoreflect.Message.Interface()` while keeping the reflective handle internally.
+
+What worked:
+- This is the first real protobuf-runtime breakthrough in v2.
+- The heavy benchmark dropped sharply:
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `104227-113210 ns/op`, `83924-83962 B/op`, `1487 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `69682-75128 ns/op`, `52289-52296 B/op`, `1063 allocs/op`
+- The simple benchmark improved substantially too:
+  - v1: `29169-30904 ns/op`, `14895-14920 B/op`, `286 allocs/op`
+  - v2: `24780-26531 ns/op`, `12261-12272 B/op`, `243 allocs/op`
+- That means this stage simultaneously improved CPU, bytes/op, and alloc count on both comparison workloads.
+- The v2 engine now genuinely differentiates between generated-linked schemas and runtime-only schemas without sacrificing dynamic-schema support.
+
+What did not work:
+- The generated path was not a drop-in replacement. Descriptor identity differs between the compiler’s protobuf descriptors and the generated descriptors, so the runtime had to be redesigned to carry both descriptor families explicitly.
+- The remaining hot path is still not “ultra high performance” yet. Profiling shows the next package-side costs are now:
+  - resolver context extraction (`v2ContextProgram.extractRows`, `v2ResolvePathProgram.extractFromMessage`)
+  - JSON object assembly and merge (`astjson`, `jsonBuilder`)
+  - server-side mock/response allocations in the benchmark harness
+
+Useful conclusions:
+- Generated-message execution belongs in the architecture, not as an optional micro-fast-path. It materially changes the runtime profile.
+- The v2 engine is now structurally in the right place for the next radical move: the protobuf backend is no longer the main blocker for linked schemas.
+- The next serious ceiling is now the response pipeline and the context-extraction row model, not basic protobuf message allocation.
+
+Decision:
+- keep
+
+## Stage 18: Native Resolve Calls In `grpc_datasource_v2`
+
+Goal:
+Push the new v2 engine past root-only fetches and make the benchmark-dominant resolver path execute natively. The target is dependency-driven `CallKindResolve` execution with compiled context extraction from prior protobuf outputs.
+
+Hypothesis:
+If v2 stops falling back to v1 for field resolvers and instead:
+- keeps prior stage protobuf outputs in kernel state
+- compiles resolve-context extraction into path programs
+- builds resolve requests directly from dependency outputs plus `field_args`
+- merges resolver `result` payloads by `ResponsePath`
+then the heavy benchmark should move meaningfully because the old dependency graph/compiler path disappears from execution.
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench 'Benchmark_DataSource_(V1|V2)_' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_V2_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-v2-resolve.cpu.out -memprofile /tmp/grpc-ds-v2-resolve.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top /tmp/grpc-ds-v2-resolve.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-v2-resolve.mem.out`
+
+Baseline before stage:
+- Stage 17 comparison:
+  - `Benchmark_DataSource_V1_Load`: `28810-29932 ns/op`, `14894-14919 B/op`, `286 allocs/op`
+  - `Benchmark_DataSource_V2_Load`: `26249-27292 ns/op`, `14833-14834 B/op`, `289 allocs/op`
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `103156-107469 ns/op`, `83930-83956 B/op`, `1486-1488 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `102145-105199 ns/op`, `83876-83933 B/op`, `1487-1488 allocs/op`
+- In that state, the heavy benchmark was still effectively the v1 path because v2 fell back for resolver fetches.
+
+What changed architecturally:
+- Each v2 fetch now carries dependency IDs.
+- V2 runtime keeps prior protobuf outputs by fetch ID so later stages can consume native outputs directly.
+- Resolve requests now have a dedicated compiled `context` program instead of abusing the standard variable-only request builder.
+- Resolve paths are compiled into field-runtime step programs and executed directly against dependency protobuf outputs.
+- V2 now merges resolve responses with `mergeWithPath` like v1, instead of treating every fetch as a root merge.
+
+What worked:
+- The heavy benchmark query is now truly native in v2. It no longer has to whole-operation fallback just because the plan contains resolver calls.
+- End-to-end parity remains intact:
+  - the resolver-heavy query now compiles as `nativeOperation == true`
+  - v1/v2 JSON parity passes
+  - compiled resolve request construction from dependency output passes
+- The heavy benchmark improved on bytes/op and moved into a better CPU range on average:
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `102533-110449 ns/op`, `83873-83975 B/op`, `1485-1488 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `95456-112489 ns/op`, `81777-81797 B/op`, `1526-1528 allocs/op`
+- Mean CPU across the 3 runs is modestly better for v2, and the floor is materially better (`95.5 us/op` vs `102.5 us/op`).
+- The new stage establishes the right execution architecture for the next major improvements: the dominant resolver path is finally inside the new engine, where it can now be optimized directly.
+
+What did not work:
+- Allocation count got worse in the heavy benchmark, rising from `1485-1488` to `1526-1528 allocs/op`.
+- The simple benchmark remained faster on CPU but gave back some bytes/op and allocs:
+  - v1: `28630-29491 ns/op`, `14894-14918 B/op`, `286 allocs/op`
+  - v2: `25662-27261 ns/op`, `15008-15012 B/op`, `291 allocs/op`
+- Profiling shows the new context path is not free:
+  - `v2ContextProgram.extractRows`
+  - `v2ResolvePathProgram.extractFromMessage`
+  - `dynamicpb.(*Message).Mutable`
+  are now visible allocation sites on the heavy path.
+- The real ceiling is still dominated by generic protobuf runtime cost and subtree JSON assembly, not by scheduling anymore.
+
+Useful conclusions:
+- This was the necessary architectural breakpoint. V2 can now execute staged root + resolver plans without dropping back to v1.
+- The next round of wins will not come from more graph/compiler work; that layer is now largely gone from the heavy path.
+- The next meaningful levers are:
+  - lower-allocation context row construction
+  - less generic protobuf message/runtime handling than `dynamicpb`
+  - direct resolver-value attachment or a final-response writer that avoids more intermediate subtree work
+
+Decision:
+- keep
+
+## Stage 17: Split The Work Into A New `grpc_datasource_v2`
+
+Goal:
+Stop trying to drag the original datasource toward an ultra-high-performance architecture in place. Revert the v1 package to baseline, preserve the experiment ledger, and introduce a second datasource that can take a fundamentally different route without destabilizing the current engine.
+
+Hypothesis:
+A side-by-side v2 engine will make the next radical work materially easier:
+- v1 stays as the correctness baseline
+- v2 can introduce a compiled IR/runtime without fitting itself into every old abstraction
+- behavior can stay exact by falling back to v1 for unsupported fetches or operations
+- benchmark comparisons become clean and repeatable
+
+Files touched:
+- `IMPROVEMENTS.md`
+- `docs/superpowers/specs/2026-04-18-grpc-datasource-v2-design.md`
+- `docs/superpowers/plans/2026-04-18-grpc-datasource-v2.md`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go`
+
+Commands run:
+- reverted the v1 package back to `HEAD`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'Test(NewDataSourceV2|DataSourceV2)' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench 'Benchmark_DataSource_(V1|V2)_' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- The previous optimization campaign ended at roughly:
+  - `Benchmark_DataSource_Load`: `1910-1975 ns/op`, `1246-1251 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `84145-87850 ns/op`, `51528-51905 B/op`, `906-907 allocs/op`
+- Those changes were intentionally reverted from v1 so the old datasource could become a clean control again.
+
+What v2 is:
+- `DataSourceV2` is a second datasource, not a patch set on v1.
+- It compiles the existing execution plan into a new compact runtime shape:
+  - stages
+  - fetch descriptors
+  - request programs
+  - response programs
+- It builds a schema runtime from protobuf descriptors for dynamic schemas from day one.
+- It keeps exact behavior by routing unsupported operations through the existing `DataSource`.
+
+What v2 supports natively today:
+- standard root fetches with no dependency edges
+- compiled request building for supported scalar/message shapes
+- compiled response writing for supported scalar/message shapes
+- exact whole-operation fallback to v1 for unsupported shapes
+
+What worked:
+- The architectural split is now real and testable.
+- We can compare v1 and v2 on the same package without rewriting the existing engine.
+- The native v2 path is functionally correct for the first supported operation shape:
+  - native v1/v2 output parity test passes
+  - fallback v1/v2 parity test passes
+- The new schema runtime works for dynamic schemas from day one by compiling descriptor-backed message tables even when no generated fast path is available.
+- The first native comparison already shows a CPU win on the simple standard-fetch path:
+  - `Benchmark_DataSource_V1_Load`: `28810-29932 ns/op`, `14894-14919 B/op`, `286 allocs/op`
+  - `Benchmark_DataSource_V2_Load`: `26249-27292 ns/op`, `14833-14834 B/op`, `289 allocs/op`
+- The heavy benchmark remains essentially at parity because v2 currently falls back to v1 there:
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`: `103156-107469 ns/op`, `83930-83956 B/op`, `1486-1488 allocs/op`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`: `102145-105199 ns/op`, `83876-83933 B/op`, `1487-1488 allocs/op`
+
+What did not work:
+- The first native v2 path is not a breakthrough yet.
+- It improves the simple standard-fetch path, but only modestly.
+- It still uses `dynamicpb` as the runtime message container, so the hot path has not yet escaped generic protobuf object creation.
+- The benchmark-dominant resolver path is still handled by v1, so v2 has not attacked the real ceiling yet.
+- The current v2 response path still builds `astjson` trees and merges them at the operation root; it is not yet a true direct final writer.
+
+Useful conclusions:
+- Splitting the engine was the right move. The new compiler/runtime can now evolve independently without destabilizing v1.
+- A compact IR plus fallback is a viable migration strategy for keeping all behavior while radically changing architecture.
+- The next serious breakthrough must come from extending native coverage into dependency-driven and resolver-heavy execution, not from polishing the first root-fetch path.
+- The biggest remaining ceilings are now explicit:
+  - native support for dependent/resolve calls
+  - a less generic message runtime than `dynamicpb`
+  - a final-response writer that avoids intermediate subtree assembly
+
+Decision:
+- keep
+- revert
+- revisit
+```
+
+## Stage 1: Kernel Boundary And Precompiled Schedule
+
+Goal:
+Move execution ordering out of `Load` and into datasource construction.
+
+Hypothesis:
+If the datasource compiles a fixed execution schedule once, `Load` can stop rebuilding scheduling state on every request and become an executor over precomputed stages.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments|BenchmarkBuildDependencyGraph)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -sample_index=alloc_space -top ...`
+
+Baseline before stage:
+- `Benchmark_DataSource_Load`: `2319 ns/op`, `1852 B/op`, `30 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `154109 ns/op`, `84956 B/op`, `1488 allocs/op`
+
+Result after stage:
+- `Benchmark_DataSource_Load`: `2168 ns/op`, `1588 B/op`, `30 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `144825 ns/op`, `83671 B/op`, `1485 allocs/op`
+
+What worked:
+- `Load` now executes a kernel program with precompiled stages instead of constructing a dependency graph and sorting it per request.
+- Post-change alloc-space profiling no longer shows `NewDependencyGraph` or `TopologicalSortResolve` in the hot path for `Benchmark_DataSource_Load`.
+- The stage produced an immediate structural and measurable win.
+
+What did not work:
+- This stage does not yet change the standalone `BenchmarkBuildDependencyGraph` benchmark because that benchmark still exercises the old helper directly.
+- It does not materially reduce alloc count by itself because the remaining request-building pipeline is still dynamic and reflective.
+
+Decision:
+- keep
+
+## Stage 2: Hot-Path Graph And Sort Removal
+
+Goal:
+Finish the transition from per-request graph scheduling to request-local execution state over a precompiled program.
+
+Hypothesis:
+Even if the standalone graph helper remains in the package, removing it from `Load` should lower package-side CPU and alloc space in the real benchmark path.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments|BenchmarkBuildDependencyGraph)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -sample_index=alloc_space -top ...`
+
+Baseline before stage:
+- Same as Stage 1 baseline.
+
+Result after stage:
+- Same as Stage 1 result. This was implemented together with Stage 1 because separating the boundary from the schedule removal would have created an unmeasurable intermediate state.
+
+What worked:
+- The hot path no longer depends on `DependencyGraph`.
+- Request-local state is now explicit, which is the foundation for later compiled request/context stages.
+
+What did not work:
+- The legacy graph helper still exists for tests and benchmarks, so graph code is not deleted yet.
+
+Decision:
+- keep
+
+## Stage 3: Pre-Resolved Call Metadata And Stable Field Lookups
+
+Goal:
+Start turning request construction into a compiled path by pre-resolving per-call metadata and removing copy-returning lookup helpers from the hot path.
+
+Hypothesis:
+If the kernel carries service names and message handles, and field lookups stop returning copies, request construction should spend less time and memory on repeated schema lookups and avoidable heap traffic.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/execution_plan.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments|BenchmarkBuildDependencyGraph)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- `Benchmark_DataSource_Load`: `2168 ns/op`, `1588 B/op`, `30 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `144825 ns/op`, `83671 B/op`, `1485 allocs/op`
+
+Result after stage:
+- `Benchmark_DataSource_Load`: `2074 ns/op`, `1524 B/op`, `28 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `133514 ns/op`, `82613 B/op`, `1472 allocs/op`
+
+What worked:
+- The kernel now pre-resolves service full names and input/output message handles.
+- `Message.GetField` now returns stable stored pointers instead of pointer-to-copy results.
+- `RPCFields.ByName` now returns the actual slice element instead of a range-variable copy.
+- This was the first stage to reduce allocation count in both load benchmarks.
+
+What did not work:
+- This is only the first pass of compiled request construction; the hot path still interprets `RPCMessage` recursively and still uses `dynamicpb` heavily.
+- Descriptor-by-name lookups still happen during message construction, so this is not yet the full builder architecture.
+
+Decision:
+- keep
+
+## Stage 4: Row-Based Resolver Context Batches
+
+Goal:
+Reduce memory pressure in resolver batching by replacing `[]map[string]protoref.Value` with a denser row-based representation.
+
+Hypothesis:
+If resolver context data is collected as ordered rows instead of per-row maps, the field-args benchmark should allocate less and stop showing `resolveContextData` as a significant alloc-space hotspot.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -sample_index=alloc_space -top ...`
+
+Baseline before stage:
+- `Benchmark_DataSource_Load`: `2074 ns/op`, `1524 B/op`, `28 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `133514 ns/op`, `82613 B/op`, `1472 allocs/op`
+
+Result after stage:
+- `Benchmark_DataSource_Load`: `1960 ns/op`, `1524 B/op`, `28 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `135182 ns/op`, `80033 B/op`, `1464 allocs/op`
+
+What worked:
+- `Benchmark_DataSource_Load_WithFieldArguments` dropped another `2580 B/op` and `8 allocs/op`.
+- Post-change alloc-space profiling no longer shows `RPCCompiler.resolveContextData` among the top allocation hotspots.
+- The simpler benchmark also improved again in latency.
+
+What did not work:
+- The wall-clock delta for the field-args benchmark was slightly worse on this single run, so CPU impact is not yet clearly positive.
+- This is still not a fully compiled extractor; path walking and dynamic message access remain in the hot path.
+
+Decision:
+- keep
+
+## Stage 5: Proto Runtime Boundary Experiment
+
+Goal:
+Introduce a protobuf runtime boundary so later generated or compiled-dynamic backends can be added without another datasource rewrite.
+
+Hypothesis:
+An initial boundary that still uses `dynamicpb` should be behaviorally neutral and should not materially change benchmark results.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime.go` (reverted)
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_dynamicpb.go` (reverted)
+
+Commands run:
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- repeated benchmark run
+- reverted patch
+- reran tests and benchmarks
+
+Baseline before stage:
+- `Benchmark_DataSource_Load`: `1960 ns/op`, `1524 B/op`, `28 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `135182 ns/op`, `80033 B/op`, `1464 allocs/op`
+
+Result after stage:
+- First run after introducing the boundary:
+  - `Benchmark_DataSource_Load`: `2069 ns/op`, `1524 B/op`, `28 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `142519 ns/op`, `80410 B/op`, `1465 allocs/op`
+- Repeat run:
+  - `Benchmark_DataSource_Load`: `2090 ns/op`, `1524 B/op`, `28 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `172699 ns/op`, `80321 B/op`, `1465 allocs/op`
+- After revert:
+  - `Benchmark_DataSource_Load`: `1998 ns/op`, `1524 B/op`, `28 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `138150 ns/op`, `80138 B/op`, `1465 allocs/op`
+
+What worked:
+- The abstraction itself was straightforward to wire in.
+
+What did not work:
+- The experiment was not behaviorally neutral in benchmark runs.
+- Because this stage is supposed to be an enabler rather than a win, carrying even a possible regression is not justified yet.
+- The runtime boundary likely needs to arrive together with a real fast path so the abstraction cost is absorbed by a larger architectural gain.
+
+Decision:
+- revert
+- revisit
+
+## Stage 8: Direct Response Application Experiment
+
+Goal:
+Bypass per-call intermediate `astjson` subtree materialization and merge standard and resolver responses directly into the final response tree.
+
+Hypothesis:
+If the datasource stops building a temporary JSON subtree for each non-entity RPC result, the hot path should reduce allocation pressure and improve both benchmarks, especially the field-resolver case.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/json_builder_direct_test.go` (reverted)
+
+Commands run:
+- added red tests for direct root application and direct path application
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestJSONBuilder_ApplyMessageMatchesMarshalResponseJSON|TestJSONBuilder_ApplyMessageWithPathMatchesMarshalAndMerge'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- repeated benchmark run
+- reverted hot-path integration
+- reran tests and benchmarks
+
+Baseline before stage:
+- `Benchmark_DataSource_Load`: `1998 ns/op`, `1524 B/op`, `28 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `138150 ns/op`, `80138 B/op`, `1465 allocs/op`
+
+Result after stage:
+- With direct application integrated:
+  - first run:
+    - `Benchmark_DataSource_Load`: `2014 ns/op`, `1524 B/op`, `28 allocs/op`
+    - `Benchmark_DataSource_Load_WithFieldArguments`: `151989 ns/op`, `80279 B/op`, `1466 allocs/op`
+  - repeat run:
+    - `Benchmark_DataSource_Load`: `1990 ns/op`, `1524 B/op`, `28 allocs/op`
+    - `Benchmark_DataSource_Load_WithFieldArguments`: `142955 ns/op`, `80114 B/op`, `1466 allocs/op`
+- After reverting the hot-path integration:
+  - repeat run:
+    - `Benchmark_DataSource_Load`: `1982 ns/op`, `1524 B/op`, `28 allocs/op`
+    - `Benchmark_DataSource_Load_WithFieldArguments`: `141360 ns/op`, `80338 B/op`, `1465 allocs/op`
+
+What worked:
+- The direct application logic was implementable and behaviorally correct under dedicated red/green tests.
+- The root benchmark stayed roughly flat, so the idea is not obviously invalid.
+
+What did not work:
+- The field-resolver benchmark did not improve and consistently lost enough ground to reject the stage.
+- Allocation count did not improve at all in the hot path, which means the avoided subtree materialization was not the dominant remaining allocator.
+- This path likely needs to be paired with a broader protobuf/runtime change before it pays off.
+
+Decision:
+- revert
+- revisit
+
+## Stage 6: Generated Protobuf Allocation Fast Path
+
+Goal:
+Use concrete generated protobuf message types in the hot path whenever the compiled schema's message full name exists in the linked Go binary, while preserving `dynamicpb` fallback for unknown schemas.
+
+Hypothesis:
+If request and response roots allocate generated messages instead of `dynamicpb` when available, gRPC marshal/unmarshal and message allocation should get dramatically cheaper without requiring a full request-builder rewrite.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/compiler_test.go`
+
+Commands run:
+- added red tests for generated-type selection and dynamic fallback
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestRPCCompiler_NewEmptyMessage_UsesGeneratedTypeWhenAvailable|TestRPCCompiler_NewEmptyMessage_FallsBackToDynamicPBWhenGeneratedTypeUnavailable|TestBuildProtoMessage'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -sample_index=alloc_space -top ...`
+
+Baseline before stage:
+- Current kept state before this stage:
+  - `Benchmark_DataSource_Load`: about `1982-1998 ns/op`, `1524 B/op`, `28 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `138150-141360 ns/op`, `80138-80338 B/op`, `1465 allocs/op`
+
+Result after stage:
+- Repeated sample over 3 runs:
+  - `Benchmark_DataSource_Load`: `2045-2140 ns/op`, `1203-1204 B/op`, `22 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `100316-108678 ns/op`, `50261-50444 B/op`, `1004-1005 allocs/op`
+
+What worked:
+- The compiler now allocates generated message types through `protoregistry.GlobalTypes` when the full protobuf message name is linked into the binary.
+- Unknown/runtime-only schemas still fall back to `dynamicpb`.
+- The field-args benchmark saw the first truly major runtime drop:
+  - allocs/op fell from about `1465` to about `1004-1005`
+  - bytes/op fell from about `80 KB` to about `50 KB`
+  - latency stabilized around `100-109 us/op`, materially below the prior `138-141 us/op`
+- Alloc-space profiling no longer shows `dynamicpb.NewMessage` or `dynamicpb.Message.Set` as dominant field-args hotspots.
+
+What did not work:
+- The simpler load benchmark did not improve on wall-clock time; it stayed roughly flat to slightly worse, though memory improved substantially.
+- This is not yet a `vtprotobuf` path and not yet a compiled-dynamic runtime; it only captures schemas that already have linked generated Go types.
+
+Decision:
+- keep
+
+## Stage 7: Compiled Runtime-Type Cache
+
+Goal:
+Cache the runtime message type once per schema message so both generated and fallback-dynamic allocations stop redoing type resolution on every allocation.
+
+Hypothesis:
+If runtime type selection is compiled into the schema model up front, the datasource should preserve the generated fast path and slightly reduce remaining allocation-path overhead; it also establishes the correct architecture for runtime-only schemas by using `dynamicpb.NewMessageType(...)` instead of raw descriptor allocation decisions at each call site.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/compiler_test.go`
+
+Commands run:
+- added red tests for generated and fallback runtime-type assignment
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestRPCCompiler_AssignsGeneratedRuntimeTypeWhenAvailable|TestRPCCompiler_AssignsCompiledDynamicRuntimeTypeWhenGeneratedTypeUnavailable|TestRPCCompiler_NewEmptyMessage_UsesGeneratedTypeWhenAvailable|TestRPCCompiler_NewEmptyMessage_FallsBackToDynamicPBWhenGeneratedTypeUnavailable'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- After Stage 6:
+  - `Benchmark_DataSource_Load`: about `2045-2140 ns/op`, `1203-1204 B/op`, `22 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `100316-108678 ns/op`, `50261-50444 B/op`, `1004-1005 allocs/op`
+
+Result after stage:
+- Repeated sample over 3 runs:
+  - `Benchmark_DataSource_Load`: `2053-2112 ns/op`, `1203-1204 B/op`, `22 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `96708-101801 ns/op`, `50221-50301 B/op`, `1004-1005 allocs/op`
+
+What worked:
+- Runtime type choice is now cached directly on each compiled message as `RuntimeType`.
+- Fallback dynamic allocation now uses compiled `dynamicpb.NewMessageType(...)` instead of re-deciding from the raw descriptor every time.
+- The expensive benchmark improved a bit further and did so consistently across repeated runs.
+- This stage strengthens the runtime-loaded-schema architecture even where the current benchmark suite does not directly isolate that path.
+
+What did not work:
+- This is an incremental stage, not another large jump.
+- The current end-to-end benchmark suite still primarily exercises the linked-generated schema path, so the runtime-only-schema benefit is validated by tests and architecture more than by a dedicated external benchmark.
+
+Decision:
+- keep
+
+## Stage 9: Kernel-Owned Sharded Memory
+
+Goal:
+Replace request-byte-keyed arena acquisition with kernel-owned request state, shard-local arena pools, and fixed scratch buffers sized to the compiled execution program.
+
+Hypothesis:
+If the kernel owns request memory instead of rebuilding slices and hashing full request bytes on every `Load`, the hot path should reduce allocation count again and improve arena reuse characteristics under varied real traffic.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+
+Commands run:
+- added red tests for stable logical slot keys and preallocated request scratch
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^(TestKernelMemoryUsesStableLogicalSlotKeys|TestKernelMemoryAcquireRequestStatePreallocatesScratch)$'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^Test_DataSource_Load$'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- After Stage 7:
+  - `Benchmark_DataSource_Load`: about `2053-2112 ns/op`, `1203-1204 B/op`, `22 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `96708-101801 ns/op`, `50221-50301 B/op`, `1004-1005 allocs/op`
+
+Result after stage:
+- Repeated sample over 3 runs:
+  - `Benchmark_DataSource_Load`: `1873-1961 ns/op`, `1182-1186 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `97023-101347 ns/op`, `55023-55277 B/op`, `997-998 allocs/op`
+
+What worked:
+- The kernel now owns request-local scratch state instead of allocating `serviceCalls`, stage result slices, and pool-item tracking on every request.
+- Arena keys are now stable logical slot identifiers derived from the compiled execution program rather than the full input payload, which is the right memory-ownership model for real traffic with variable inputs.
+- The simple load benchmark improved again in both latency and allocations.
+- The field-args benchmark cut another `6-8 allocs/op`, bringing the end state below `1000` allocs/op in repeated runs.
+
+What did not work:
+- Bytes/op went up for the field-args benchmark even though allocation count dropped, which implies the remaining dominant transport/protobuf allocations are chunkier than the request-state allocations we removed.
+- This stage does not address the reflective request construction and protobuf decode work that still dominate the expensive benchmark.
+
+Decision:
+- keep
+
+## Stage 10: End-State Reprofile And Comparison
+
+Goal:
+Reprofile the kept end state, compare it to the original baseline, and identify the next architectural ceiling.
+
+Hypothesis:
+The retained stages should shift the dominant costs away from datasource scheduling/allocation scaffolding and toward the remaining protobuf/gRPC and JSON assembly work.
+
+Files touched:
+- `IMPROVEMENTS.md`
+
+Commands run:
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load$' -benchmem -cpuprofile /tmp/grpc-ds-load.cpu.out -memprofile /tmp/grpc-ds-load.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-load-args.cpu.out -memprofile /tmp/grpc-ds-load-args.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top /tmp/grpc-ds-load.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-load.mem.out`
+- `go tool pprof -top /tmp/grpc-ds-load-args.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-load-args.mem.out`
+
+Baseline before stage:
+- Original campaign baseline:
+  - `Benchmark_DataSource_Load`: `2319 ns/op`, `1852 B/op`, `30 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `154109 ns/op`, `84956 B/op`, `1488 allocs/op`
+
+Result after stage:
+- Current end state:
+  - `Benchmark_DataSource_Load`: about `1873-1961 ns/op`, `1182-1186 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `97023-101347 ns/op`, `55023-55277 B/op`, `997-998 allocs/op`
+
+What worked:
+- The retained stages materially changed the runtime shape:
+  - `Load` no longer rebuilds graph scheduling state.
+  - request state and arena ownership are now compiled and kernel-local.
+  - generated protobuf message allocation is the default fast path when linked types are available.
+- End-state alloc-space profiles confirm the old scheduling allocators are gone from the hot path.
+- The remaining package-side alloc hotspots in the simple load case are now concentrated in `CompileCompiledFetches`, `CompileCompiledNode`, `buildProtoMessage`, `newEmptyMessage`, `resolveNestedMessage`, and JSON escaping.
+- In the field-args benchmark, the dominant ceiling is now clearly the transport/protobuf path plus remaining reflective datasource work:
+  - gRPC stream creation and transport buffers
+  - protobuf unmarshal
+  - `buildProtoMessageWithContext`
+  - `resolveContextData` / `resolveListDataForPath`
+  - `marshalResponseJSON`
+
+What did not work:
+- The direct response writer experiment did not pay off in isolation and remains out of the final state.
+- The datasource is still far from zero-GC or zero-reflection because request construction for contextual resolver calls and response JSON assembly remain generic.
+- The field-args profile makes it clear that the next major gain will not come from another small memory tweak; it requires removing more of the reflective protobuf and response-building pipeline.
+
+Decision:
+- keep
+
+## Stage 11: Compiled Resolver-Context Extraction Experiment
+
+Goal:
+Compile resolver-context extraction into field-number-based programs and direct setters so `buildProtoMessageWithContext` stops reinterpreting `ResolvePath` and field names on every request.
+
+Hypothesis:
+If the datasource precompiles the resolver-context path and target bindings, the field-args benchmark should reduce package-side CPU and allocations by deleting runtime path interpretation work from `resolveContextData` and context-row assembly.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go` (reverted)
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go` (reverted)
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go` (reverted)
+
+Commands run:
+- added red test for precompiled resolve-plan presence and usage
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^TestKernelCompilesResolveRequestPlanAndUsesIt$'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- reverted patch
+- reran package tests and repeated benchmark suite
+
+Baseline before stage:
+- Current kept state before the experiment:
+  - `Benchmark_DataSource_Load`: about `1873-1961 ns/op`, `1182-1186 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `97023-101347 ns/op`, `55023-55277 B/op`, `997-998 allocs/op`
+
+Result after stage:
+- With the compiled resolver-context extraction integrated:
+  - `Benchmark_DataSource_Load`: `1915-1994 ns/op`, `1199-1202 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `114566-159581 ns/op`, `56249-57218 B/op`, `976 allocs/op`
+- After reverting:
+  - `Benchmark_DataSource_Load`: `1872-1902 ns/op`, `1182-1183 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `94433-108731 ns/op`, `54698-56471 B/op`, `995-997 allocs/op`
+
+What worked:
+- The compiler could precompute resolver-context path programs and apply them correctly in tests.
+- Allocation count in the heavy benchmark dropped further, from roughly `997-998` to `976 allocs/op`.
+
+What did not work:
+- CPU regressed too much in the heavy benchmark, with repeated runs landing well above the kept baseline.
+- Bytes/op also increased, so the experiment did not meet the “more memory efficient and faster” bar.
+- The compiled path evaluator still walked protobuf values generically enough that the extra abstraction cost outweighed the saved name lookups.
+
+Decision:
+- revert
+- revisit
+
+## Stage 12: Shared-Context Fast Path Experiment
+
+Goal:
+Compile a shared-context fast path for resolver requests that traverse the parent repeated list once and write context rows directly, instead of resolving each context field path independently.
+
+Hypothesis:
+If the datasource compiles the shared parent-list prefix of resolver context paths, the field-args benchmark should improve over Stage 11 by deleting duplicate traversal work instead of merely changing field lookup mechanics.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go` (reverted)
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go` (reverted)
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go` (reverted)
+
+Commands run:
+- added red test for shared-context plan compilation and request building
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^TestKernelCompilesSharedContextPlanAndUsesIt$'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- reverted patch
+- reran package tests and repeated benchmark suite
+
+Baseline before stage:
+- Stable kept reference before the experiment:
+  - `Benchmark_DataSource_Load`: about `1873-1961 ns/op`, `1182-1186 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `97023-101347 ns/op`, `55023-55277 B/op`, `997-998 allocs/op`
+
+Result after stage:
+- With the shared-context fast path integrated:
+  - `Benchmark_DataSource_Load`: `2326-3035 ns/op`, `1201-1215 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `131474-159120 ns/op`, `55121-58122 B/op`, `942 allocs/op`
+- After reverting:
+  - immediate reruns were noisy and remained above the prior stable kept reference:
+    - `Benchmark_DataSource_Load`: `2004-2227 ns/op` on one run set, `2402-2556 ns/op` on another, with one `4727 ns/op` outlier
+    - `Benchmark_DataSource_Load_WithFieldArguments`: `132816-170761 ns/op`, `56635-58895 B/op`, `996-997 allocs/op`
+
+What worked:
+- The shared-context plan compiled and produced correct batched resolver requests in tests.
+- Allocation count in the heavy benchmark dropped materially again, from about `997-998` to `942 allocs/op`.
+
+What did not work:
+- CPU regressed even more than the previous Stage 11 experiment.
+- Bytes/op stayed worse than the stable kept reference.
+- The direct shared-list traversal still paid too much generic protobuf overhead to be a net runtime win.
+- Post-revert reruns were noisier than the earlier stable reference, so the kept reference numbers remain the last reliable comparison point for the campaign summary.
+
+Decision:
+- revert
+- revisit
+
+## Stage 13: Generated Direct Resolver-Context Builder
+
+Goal:
+Bypass `protoreflect` entirely for resolver-context request building on linked generated schemas by compiling the shared resolver-context path against generated Go struct layouts and copying fields directly into the generated request structs.
+
+Hypothesis:
+If the datasource compiles the resolver-context builder against generated message layouts, the field-args benchmark should finally improve on CPU as well as memory because the hot path deletes both repeated path interpretation and the generic protobuf field-set path for batched resolver contexts.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+
+Commands run:
+- added red test for generated context-plan compilation and request building
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^TestKernelCompilesGeneratedContextPlanAndUsesIt$'`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- Stable kept reference before this stage:
+  - `Benchmark_DataSource_Load`: about `1873-1961 ns/op`, `1182-1186 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `97023-101347 ns/op`, `55023-55277 B/op`, `997-998 allocs/op`
+
+Result after stage:
+- Repeated sample over 3 runs:
+  - `Benchmark_DataSource_Load`: `1884-1944 ns/op`, `1198-1199 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `91316-109863 ns/op`, `52187-53744 B/op`, `903-904 allocs/op`
+
+What worked:
+- The kernel now compiles a generated-context plan for supported resolve fetches.
+- The resolve hot path for those fetches now builds context rows by directly walking generated Go structs and filling generated request structs, instead of going through `protoreflect` for each copied field.
+- This is the first resolver-context experiment that improved CPU and memory at the same time.
+- The heavy benchmark improved materially again:
+  - allocs/op dropped from about `997-998` to `903-904`
+  - bytes/op dropped from about `55 KB` to `52-54 KB`
+  - latency improved from about `97-101 us/op` to about `91-110 us/op`, with the best run landing at `91.3 us/op`
+
+What did not work:
+- The simple load benchmark stayed essentially flat; this stage is specifically a resolver-context win.
+- The fast path only applies when both the parent response and resolver request/context messages are linked generated types and the resolve paths fit the compiled shared-prefix shape.
+- Fallback dynamic schemas still use the generic path.
+
+Decision:
+- keep
+
+## Stage 14: Generated Response Writer Fast Path
+
+Goal:
+Delete the reflective response walk for supported generated response messages by compiling a response plan once and writing JSON from generated Go structs directly.
+
+Hypothesis:
+If the datasource can compile the response tree for generated schemas, the heavy benchmark should drop again because the response path will stop doing descriptor lookups and generic `protoreflect` traversal on every returned message.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+
+Commands run:
+- added red test for generated response-plan compilation and response writing
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^TestKernelCompilesGeneratedResponsePlanAndUsesIt$' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-stage14-args.cpu.out -memprofile /tmp/grpc-ds-stage14-args.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top /tmp/grpc-ds-stage14-args.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-stage14-args.mem.out`
+
+Baseline before stage:
+- Stable kept reference before this stage:
+  - `Benchmark_DataSource_Load`: about `1884-1944 ns/op`, `1198-1199 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `91316-109863 ns/op`, `52187-53744 B/op`, `903-904 allocs/op`
+
+Result after stage:
+- Repeated sample over 3 runs:
+  - `Benchmark_DataSource_Load`: `1869-1959 ns/op`, `1230-1234 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `86220-88353 ns/op`, `51720-52100 B/op`, `904 allocs/op`
+
+What worked:
+- The kernel now compiles a generated response plan for supported generated response messages.
+- `Load` now selects a generated response writer fast path before falling back to the existing reflective marshaller.
+- The heavy benchmark improved again on CPU and bytes/op:
+  - latency moved from about `91-110 us/op` down to about `86-88 us/op`
+  - bytes/op moved from about `52-54 KB` down to about `51.7-52.1 KB`
+  - alloc count stayed flat at `904 allocs/op`
+- Post-change profiling shows the old reflective response builder is no longer the package-side response hotspot on the generated path.
+
+What did not work:
+- This does not yet reduce alloc count because the fast path still materializes the same `astjson` tree shape.
+- The simple load benchmark stayed effectively flat and gave back a small amount of bytes/op versus Stage 13.
+- The fast path intentionally rejects oneofs, fragment-driven response selection, optional scalar wrappers, list-wrapper flattening, enum mapping, and field-flattening (`JSONPath == ""`), so unsupported shapes still use the old marshaller.
+- This is not the final “direct final response writer” architecture; it is a generated-subtree writer that still feeds the existing merge pipeline.
+
+Decision:
+- keep
+
+## Stage 15: Generated Resolve Direct-Apply Fast Path
+
+Goal:
+Delete the intermediate resolve response subtree on supported generated resolver outputs by applying generated results directly onto the already-built root response object.
+
+Hypothesis:
+If the datasource skips materializing `{"result":[...]}` for generated resolve outputs and writes the resolved values straight onto the target objects, the heavy benchmark should improve again because it deletes both subtree creation and `mergeWithPath` work for resolver calls.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+
+Commands run:
+- added red test for generated resolve-plan compilation and direct application
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^TestKernelCompilesGeneratedResolveApplyPlanAndUsesIt$' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- reverted the stage
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+Baseline before stage:
+- Stable kept reference before this stage:
+  - `Benchmark_DataSource_Load`: about `1869-1959 ns/op`, `1230-1234 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `86220-88353 ns/op`, `51720-52100 B/op`, `904 allocs/op`
+
+Result after stage:
+- With the experiment enabled:
+  - `Benchmark_DataSource_Load`: `1935-2011 ns/op`, `1247-1251 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `159375-282128 ns/op`, `57751-60102 B/op`, `907-909 allocs/op`
+- After revert:
+  - `Benchmark_DataSource_Load`: `1896-1960 ns/op`, `1231-1236 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `86988-89208 ns/op`, `51919-52099 B/op`, `904-905 allocs/op`
+
+What worked:
+- The stage correctly compiled a narrow generated resolve-apply plan for benchmark-shaped resolver outputs.
+- Functionality was correct in the focused test and the full package test suite.
+- The experiment confirmed a useful architectural fact: deleting the intermediate resolve subtree is only valuable if it does not also serialize response construction behind the stage barrier.
+
+What did not work:
+- The heavy benchmark regressed badly on CPU, bytes/op, and alloc count.
+- The direct-apply path moved generated resolve response materialization out of the concurrent goroutine path and into the sequential merge phase, which erased the intended benefit and made the hot path slower overall.
+- In other words, this version removed object creation but also removed too much parallelism.
+
+Decision:
+- revert
+- revisit
+
+## Stage 16: Concurrent Generated Resolve-Value Slices
+
+Goal:
+Keep resolver response construction on the concurrent goroutine path while deleting the intermediate `{"result":[...]}` wrapper object for supported generated resolver outputs.
+
+Hypothesis:
+If the datasource materializes only the final resolved value slice in parallel and then does a minimal sequential attach step, the heavy benchmark should improve over Stage 14 because it removes wrapper-object creation without repeating the Stage 15 mistake of serializing response construction.
+
+Files touched:
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+
+Commands run:
+- added red test for generated resolve-values-plan compilation and value-slice attach
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run '^TestKernelCompilesGeneratedResolveValuesPlanAndUsesIt$' -count=1`
+- `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=3 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem -cpuprofile /tmp/grpc-ds-stage16-args.cpu.out -memprofile /tmp/grpc-ds-stage16-args.mem.out -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top /tmp/grpc-ds-stage16-args.cpu.out`
+- `go tool pprof -sample_index=alloc_space -top /tmp/grpc-ds-stage16-args.mem.out`
+
+Baseline before stage:
+- Stable kept reference before this stage:
+  - `Benchmark_DataSource_Load`: about `1869-1959 ns/op`, `1230-1234 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: about `86220-88353 ns/op`, `51720-52100 B/op`, `904 allocs/op`
+
+Result after stage:
+- Repeated sample over 3 runs:
+  - `Benchmark_DataSource_Load`: `1910-1975 ns/op`, `1246-1251 B/op`, `19 allocs/op`
+  - `Benchmark_DataSource_Load_WithFieldArguments`: `84145-87850 ns/op`, `51528-51905 B/op`, `906-907 allocs/op`
+
+What worked:
+- The kernel now compiles a generated resolve-values plan for the supported benchmark-shaped resolver output.
+- The concurrent goroutine path now materializes only the resolved value slice for those resolver calls instead of a full `{"result":[...]}` subtree.
+- The heavy benchmark improved again on CPU and bytes/op:
+  - latency moved from about `86-88 us/op` down to about `84-88 us/op`
+  - bytes/op moved from about `51.7-52.1 KB` down to about `51.5-51.9 KB`
+- Profiling confirms the new work stayed on the concurrent path: `marshalGeneratedResolveValues` shows up as a small package-side allocation site, while the Stage 15-style sequential direct-apply regression did not reappear.
+- This stage validates the architectural constraint uncovered by Stage 15: subtree deletion is only a win when parallel response materialization is preserved.
+
+What did not work:
+- Allocation count increased slightly in the heavy benchmark, from `904` to `906-907 allocs/op`.
+- The simple load benchmark stayed essentially flat and gave back a small amount of bytes/op.
+- The fast path still only supports a narrow resolver-output shape: repeated top-level `result` with exactly one scalar or message field per item, using generated linked schemas.
+
+Decision:
+- keep
+
+## Current Status
+
+From the original baseline to the current state:
+
+- `Benchmark_DataSource_Load`: `2319 ns/op` -> about `1910-1975 ns/op`, `1852 B/op` -> `1246-1251 B/op`, `30 allocs/op` -> `19 allocs/op`
+- `Benchmark_DataSource_Load_WithFieldArguments`: `154109 ns/op` -> about `84145-87850 ns/op`, `84956 B/op` -> `51528-51905 B/op`, `1488 allocs/op` -> `906-907 allocs/op`
+
+Interpretation:
+
+- The biggest structural gain came from compiling scheduling and request ownership out of the hot path.
+- The next gains came from reducing schema-lookup churn and map-heavy context construction.
+- The largest single runtime drop came from replacing `dynamicpb` allocation with generated message allocation when linked types are available.
+- Kernel-owned sharded memory closed out another chunk of request-local allocation overhead and fixed the wrong pool-key model.
+- The compiled runtime-type cache cleaned up the remaining allocation path and formalized the right architecture for runtime-only schemas.
+- A standalone runtime abstraction is not worth keeping until it lands with a real faster backend.
+- Direct response application is not worth keeping by itself; it needs a larger surrounding architecture change to matter.
+- A field-number-based resolver-path interpreter is not enough by itself; it saves allocations but still loses on CPU.
+- A shared-list context fast path is also not enough by itself; it removes row-building allocations but still loses on CPU because the underlying protobuf machinery remains too generic.
+- The first real post-Stage-10 win came from deleting generic protobuf work entirely on the generated resolver path rather than compiling another generic interpreter.
+- The resolve-side wrapper subtree can be reduced profitably, but only if the work stays on the concurrent goroutine path.
+- The next real ceiling is now even narrower: the remaining generic fallback path and the still-generic final response assembly for unsupported shapes.
+
+## Stage 0: Baseline And Profiles
+
+Goal:
+Capture a clean baseline before any code changes in this campaign.
+
+Hypothesis:
+The current baseline should confirm that `Benchmark_DataSource_Load_WithFieldArguments` is the dominant cost center and that dependency graph work is measurable but secondary.
+
+Files touched:
+- `IMPROVEMENTS.md`
+
+Commands run:
+- `cd v2 && go test -count=1 -run '^$' -bench '^(Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments|BenchmarkBuildDependencyGraph|BenchmarkCompareKeyFields)$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load$' -cpuprofile <tmp> -memprofile <tmp> -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -cpuprofile <tmp> -memprofile <tmp> -memprofilerate=1 -cpu=1 ./pkg/engine/datasource/grpc_datasource`
+- `go tool pprof -top ...`
+- `go tool pprof -sample_index=alloc_space -top ...`
+
+Baseline before stage:
+- not applicable
+
+Result after stage:
+- `Benchmark_DataSource_Load_WithFieldArguments` is still the dominant runtime target at
+  `154109 ns/op`, `84956 B/op`, `1488 allocs/op`.
+- `BenchmarkBuildDependencyGraph` is small in absolute time but still pure structural overhead
+  at `343.1 ns/op`, `432 B/op`, `7 allocs/op`.
+- `Benchmark_DataSource_Load` still shows package-side alloc pressure in graph construction,
+  fetch compilation, and field lookup before transport cost dominates.
+
+What worked:
+- The current benchmark set is sufficient to compare structural changes stage by stage.
+- Profiling still clearly separates interpreter overhead from transport/protobuf overhead.
+
+What did not work:
+- CPU profiles for the field-args benchmark are noisy because profiling overhead is large relative
+  to the benchmark duration; alloc-space data is more actionable for early stages.
+
+Decision:
+- keep
diff --git a/docs/superpowers/plans/2026-04-17-grpc-datasource-ultra-performance.md b/docs/superpowers/plans/2026-04-17-grpc-datasource-ultra-performance.md
new file mode 100644
index 0000000000..29bce9997b
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-17-grpc-datasource-ultra-performance.md
@@ -0,0 +1,402 @@
+# gRPC Datasource Ultra-Performance Implementation Plan
+
+> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Replace the current interpreter-style gRPC datasource with a compiled Go execution engine that minimizes heap allocation, sharply reduces CPU cycles per request, and makes the hot path almost entirely precomputed.
+
+**Architecture:** The datasource becomes a two-phase system. A cold-path compiler turns a GraphQL operation, protobuf schema, and mapping into a specialized execution program with fixed batches, direct request builders, direct context extractors, and direct response write instructions; the hot path only binds request values, runs precompiled stages, and emits the final GraphQL response. The protobuf layer becomes pluggable so the engine can run on generated/vtprotobuf fast paths for known schemas and a compiled dynamic runtime for unknown schemas, while `dynamicpb` remains only as a compatibility fallback.
+
+**Tech Stack:** Go 1.25, `grpc-go`, `google.golang.org/protobuf`, `vtprotobuf`, optional `hyperpb`-style dynamic runtime, Go PGO, `pprof`, existing test/benchmark suite.
+
+---
+
+## Radical Thesis
+
+The current datasource is architecturally wrong for extreme performance because it behaves like a generic runtime interpreter on every request:
+
+- it rebuilds execution state in `Load`
+- it re-discovers request/response structure at runtime
+- it walks protobuf and JSON trees generically
+- it materializes intermediate structures only to merge them later
+
+The highest-impact Go-only redesign is:
+
+1. Compile each datasource instance into an operation-specific execution kernel.
+2. Replace reflection-heavy protobuf handling with a pluggable high-performance runtime.
+3. Eliminate intermediate JSON subtree assembly and write directly into the final response shape.
+
+Everything else is secondary.
+
+---
+
+## File Map
+
+### Existing files to modify
+
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+- `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- `v2/pkg/engine/datasource/grpc_datasource/fetch.go`
+- `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+- `v2/pkg/engine/datasource/grpc_datasource/execution_plan.go`
+- `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/fetch_test.go`
+
+### New files to create
+
+- `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_program.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_program_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_compile.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_compile_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_request_builder.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_request_builder_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_context_extractor.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_context_extractor_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_response_writer.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_response_writer_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_memory.go`
+- `v2/pkg/engine/datasource/grpc_datasource/kernel_memory_test.go`
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime.go`
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_dynamicpb.go`
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_vtproto.go`
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_compiled_dynamic.go`
+- `v2/pkg/engine/datasource/grpc_datasource/perf_test.go`
+
+### Optional later files
+
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_codegen.go`
+- `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_hyperpb.go`
+- `v2/pkg/engine/datasource/grpc_datasource/internal/generated/`
+
+---
+
+## Performance Targets
+
+- Turn `Load` into an executor over a precompiled kernel, not a runtime planner/compiler.
+- Remove per-request dependency graph creation and sorting entirely.
+- Remove generic name-based field lookup from the hot path entirely.
+- Remove `[]map[string]protoref.Value` and similar generic intermediate representations entirely.
+- Replace intermediate response subtree creation with direct final-response writes.
+- Reduce `Benchmark_DataSource_Load_WithFieldArguments` allocs/op by an order of magnitude.
+- Reduce package-side CPU in request compilation, context extraction, and merge work by an order of magnitude.
+- Leave gRPC transport and protobuf decode/encode as the dominant remaining cost.
+
+---
+
+## External Architecture Patterns Applied Here
+
+- Apollo Router pushes planning into a native cold path and caches the result. We should do the same inside the datasource, per operation.
+- Envoy uses shared-nothing workers and thread-local state. We should use sharded/request-local scratch and bounded execution, not request-byte-keyed global-ish reuse.
+- NGINX keeps copies minimal. We should stop building transient JSON/protobuf object graphs that are immediately merged or discarded.
+- gRPC recommends channel reuse and only using more exotic transport patterns when the transport itself becomes the bottleneck. Our current bottleneck is above transport.
+- `vtprotobuf` proves that unrolled generated code is dramatically better than generic reflection in Go.
+- `hyperpb` proves that even dynamic protobuf can be treated like a compiled runtime instead of a generic reflective interpreter.
+- Go’s PGO and recent allocation work reinforce the same conclusion: fewer heap objects and more precompiled structure win.
+
+---
+
+## Chunk 1: Build The Kernel Boundary
+
+### Task 1: Introduce The Kernel Abstraction
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+
+- [ ] Define a `kernel` object that owns the fully compiled execution program for one datasource instance.
+- [ ] Make `NewDataSource` compile the kernel once and store it.
+- [ ] Make `Load` delegate to `kernel.Execute(...)`.
+- [ ] Preserve existing public behavior and tests.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+
+### Task 2: Represent The Operation As A Compiled Program
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_program.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_program_test.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_compile.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_compile_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/fetch.go`
+
+- [ ] Compile the current `RPCExecutionPlan` into fixed execution stages:
+  - stage order
+  - batch members
+  - dependency links
+  - precomputed method names
+  - precomputed response merge routing
+- [ ] Remove per-request `NewDependencyGraph(...)`.
+- [ ] Remove per-request `TopologicalSortResolve(...)`.
+- [ ] Keep a minimal mutable request-state array for stage outputs only.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -run '^$' -bench BenchmarkBuildDependencyGraph -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+---
+
+## Chunk 2: Compile Request Construction
+
+### Task 3: Replace Runtime Message Building With Compiled Request Builders
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_request_builder.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_request_builder_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel_compile.go`
+
+- [ ] Compile each RPC request shape into a specialized builder program.
+- [ ] Pre-resolve:
+  - protobuf field descriptors
+  - field numbers
+  - nullability checks
+  - oneof routing
+  - repeated/list behavior
+  - argument-slot reads
+- [ ] The builder should operate on fixed slots and direct descriptor handles, not names.
+- [ ] Split builders by call kind:
+  - standard/entity
+  - resolve
+  - required
+- [ ] Keep `dynamicpb` compatibility while removing generic per-request interpretation.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -run '^$' -bench Benchmark_DataSource_Load -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+### Task 4: Compile Resolver Context Extraction
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_context_extractor.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_context_extractor_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/compiler.go`
+
+- [ ] Replace `resolveContextData`, `resolveContextDataForPath`, `resolveListDataForPath`, and `resolveDataForPath` as the main runtime path.
+- [ ] Compile resolver context extraction into direct extraction programs that:
+  - traverse known parent output shapes
+  - write directly into the next request’s repeated `context` field
+  - preserve response order alignment
+- [ ] Eliminate `[]map[string]protoref.Value`.
+- [ ] Eliminate per-field map growth in resolver batching.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -count=1 -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+---
+
+## Chunk 3: Replace The Protobuf Runtime
+
+### Task 5: Add A Pluggable Proto Runtime Layer
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/proto_runtime.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_dynamicpb.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_vtproto.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_compiled_dynamic.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel_request_builder.go`
+
+- [ ] Define a runtime interface for:
+  - request allocation/reset
+  - response allocation/reset
+  - unmarshal/marshal
+  - descriptor-backed field access
+  - optional pooled message reuse
+- [ ] Keep `dynamicpb` only as fallback compatibility mode.
+- [ ] Make the kernel depend on the runtime interface only.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+
+### Task 6: Add The Generated Fast Path
+
+**Files:**
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_vtproto.go`
+- Optional create: `v2/pkg/engine/datasource/grpc_datasource/internal/generated/...`
+
+- [ ] For known schemas, support generated message types with `vtprotobuf` marshal/unmarshal and pool helpers.
+- [ ] Use the generated fast path wherever the schema is known at build time.
+- [ ] Allow mixed mode so not all message types need generated support on day one.
+- [ ] Benchmark the generated fast path against `dynamicpb`.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+### Task 7: Add The Compiled Dynamic Fast Path
+
+**Files:**
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/proto_runtime_compiled_dynamic.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel_compile.go`
+
+- [ ] Introduce a compiled dynamic protobuf backend for runtime-loaded schemas.
+- [ ] The backend must:
+  - compile message types once from descriptors
+  - reuse parse/runtime state aggressively
+  - avoid generic reflection-heavy field lookup during decode
+- [ ] Model this after `hyperpb`’s compiled runtime approach, but keep the integration Go-native and repo-owned.
+- [ ] Treat this as the long-term replacement for `dynamicpb` in the hot path.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -run '^$' -bench '^Benchmark_DataSource_Load_WithFieldArguments$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+---
+
+## Chunk 4: Replace Response Tree Construction
+
+### Task 8: Build A Direct Final-Response Writer
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_response_writer.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_response_writer_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+
+- [ ] Stop creating one intermediate `astjson` subtree per service call as the primary runtime path.
+- [ ] Compile response write instructions for each RPC result:
+  - root merge writes
+  - resolver path writes
+  - entity ordering writes
+  - nested list writes
+  - optional/null handling
+- [ ] Write directly into the final response object or final response buffer using the precomputed program.
+- [ ] Keep the old builder only as a temporary parity oracle during migration.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -run '^$' -bench Benchmark_DataSource_Load -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+### Task 9: Remove Generic Merge Traversal
+
+**Files:**
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel_response_writer.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/json_builder.go`
+
+- [ ] Replace `flattenObject`, `flattenList`, and generic resolver merge traversal with precompiled parent-child alignment programs.
+- [ ] Preserve exact alias/null/list semantics.
+- [ ] Add tests for:
+  - sibling resolvers
+  - nested resolvers
+  - null parents
+  - federation/entity ordering
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+
+---
+
+## Chunk 5: Memory Model And Execution Model
+
+### Task 10: Introduce Kernel-Owned Memory Arenas And Sharded Scratch
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_memory.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/kernel_memory_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource.go`
+
+- [ ] Replace request-byte-keyed pool reuse with kernel-owned sharded scratch state.
+- [ ] Pool:
+  - request slot arrays
+  - temporary decode/build buffers
+  - response writer state
+  - optional proto objects where runtime allows safe reset/reuse
+- [ ] Keep pools bounded and shard-local.
+- [ ] Optimize for stable high-throughput workloads, not byte-identical request reuse.
+- [ ] Run:
+  - `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+  - `cd v2 && go test -run '^$' -bench Benchmark_DataSource_Load -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+### Task 11: Make Execution Bounded And Worker-Like
+
+**Files:**
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/kernel_memory.go`
+
+- [ ] Replace unconditional per-stage goroutine fan-out with a bounded execution model.
+- [ ] Inline tiny batches.
+- [ ] Use worker-local scratch for larger batches.
+- [ ] Add backpressure and clear concurrency limits so throughput does not turn into memory blow-up.
+- [ ] Run concurrency benchmarks at multiple parallelism levels.
+
+---
+
+## Chunk 6: Validation And Build Optimization
+
+### Task 12: Rebuild The Benchmark Suite Around The Kernel
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/perf_test.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_test.go`
+
+- [ ] Add isolated benchmarks for:
+  - kernel compile
+  - request build
+  - context extraction
+  - proto runtime decode
+  - direct response writing
+  - end-to-end load
+- [ ] Add benchmarks for:
+  - single root fetch
+  - one resolver stage
+  - two sibling resolver stages
+  - nested resolver chain
+- [ ] Capture fresh CPU and allocation profiles after every major chunk.
+
+### Task 13: Apply Go PGO To The New Hot Path
+
+**Files:**
+- Create when stable: `v2/default.pgo`
+
+- [ ] Collect representative CPU profiles from the new kernel-based runtime.
+- [ ] Build with Go PGO.
+- [ ] Keep PGO only if it improves the final kernel, not intermediate experiments.
+- [ ] Run:
+  - `cd v2 && go test -run '^$' -bench '^Benchmark_DataSource_Load|Benchmark_DataSource_Load_WithFieldArguments$' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+---
+
+## Non-Goals For The First Wave
+
+- Do not start with small helper cleanups as the main project.
+- Do not spend the first phase hand-tuning existing `dynamicpb` interpreter code.
+- Do not treat `sync.Pool` or tiny hash-map fixes as the strategy.
+- Do not introduce Rust, C++, or a sidecar. This remains Go-only.
+
+Those may still happen as cleanup work, but they are explicitly not the center of the plan.
+
+---
+
+## Expected End State
+
+- `DataSource.Load` is a thin wrapper around a compiled kernel executor.
+- The operation DAG, batches, service names, request shapes, response write paths, and dependency routing are all precompiled.
+- Request construction is specialized and slot-based.
+- Resolver context propagation is direct and allocation-light.
+- Response emission writes directly into the final result shape.
+- The protobuf runtime is no longer synonymous with `dynamicpb`.
+- The hot path is dominated by actual RPC I/O and decode/encode work, not internal orchestration overhead.
+
+---
+
+## Validation Checklist
+
+- [ ] End-to-end behavior matches existing tests.
+- [ ] Per-request graph/sort work is gone from profiles.
+- [ ] Generic field-name lookup is gone from profiles.
+- [ ] `resolveContextData`-style map building is gone from the hot path.
+- [ ] Intermediate response subtree materialization is no longer dominant.
+- [ ] `Benchmark_DataSource_Load_WithFieldArguments` shows order-of-magnitude improvement in allocs/op and material CPU reduction.
+- [ ] The generated backend beats the fallback backend on representative workloads.
+
+---
+
+## Execution Guidance
+
+Implement in this order:
+
+1. Kernel boundary and compiled program
+2. Compiled request builders
+3. Compiled context extraction
+4. Direct response writer
+5. Proto runtime replacement
+6. Memory/execution model tightening
+7. PGO
+
+If a task does not directly advance one of those seven items, it is probably not on the critical path.
diff --git a/docs/superpowers/plans/2026-04-18-grpc-datasource-v2.md b/docs/superpowers/plans/2026-04-18-grpc-datasource-v2.md
new file mode 100644
index 0000000000..4572b97e8d
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-18-grpc-datasource-v2.md
@@ -0,0 +1,158 @@
+# gRPC Datasource V2 Implementation Plan
+
+> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Introduce `DataSourceV2` as a second gRPC datasource with a new IR-driven execution architecture while preserving the current datasource as the baseline for correctness and comparison.
+
+**Architecture:** Revert the current datasource package to baseline, preserve the campaign findings in docs, then add `DataSourceV2` in the same package with a compiled IR, schema runtime tables for generated and dynamic schemas, and compatibility fallback to the existing datasource for unsupported fetches. Establish explicit v1 vs v2 tests and benchmarks immediately.
+
+**Tech Stack:** Go, `grpc-go`, `google.golang.org/protobuf`, existing `RPCExecutionPlan`, existing planner/compiler package internals, `pprof`, existing benchmark suite.
+
+---
+
+## Chunk 1: Reset And Preserve
+
+### Task 1: Preserve design and campaign knowledge
+
+**Files:**
+- Create: `docs/superpowers/specs/2026-04-18-grpc-datasource-v2-design.md`
+- Modify: `IMPROVEMENTS.md`
+
+- [ ] Write the design doc describing why v2 exists, the new IR/runtime, dynamic schema support, and fallback strategy.
+- [ ] Keep the improvement ledger intact as the historical record for v1 experiments.
+
+### Task 2: Revert the existing datasource package to baseline
+
+**Files:**
+- Restore tracked files under `v2/pkg/engine/datasource/grpc_datasource/`
+- Delete experimental files introduced for the v1 campaign under that directory
+
+- [ ] Revert tracked modifications in `compiler.go`, `execution_plan.go`, `grpc_datasource.go`, `json_builder.go`, related tests, and any other v1 experiment files.
+- [ ] Delete untracked experimental code files such as `kernel.go` and `kernel_test.go`.
+- [ ] Run: `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+
+## Chunk 2: Introduce V2 Skeleton
+
+### Task 3: Add the second datasource type and constructor
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+- [ ] Add `DataSourceV2` implementing `resolve.DataSource`.
+- [ ] Add `NewDataSourceV2(...)`.
+- [ ] Make `DataSourceV2` own:
+  - the v2 compiled program
+  - the schema runtime
+  - a v1 fallback datasource
+- [ ] Write a failing test asserting v2 can be constructed and loaded for a simple query.
+- [ ] Run the test to watch it fail.
+- [ ] Implement the minimal constructor and fallback-backed `Load`.
+- [ ] Run: `cd v2 && go test ./pkg/engine/datasource/grpc_datasource -run 'TestDataSourceV2'`
+
+### Task 4: Add the v2 IR and schema-runtime core types
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go`
+
+- [ ] Define `v2Program`, `v2Stage`, `v2Fetch`, opcode enums, operand structs, and support/fallback markers.
+- [ ] Define schema runtime tables for messages, fields, and methods that work for both generated and dynamic schemas.
+- [ ] Keep the initial IR small but real; do not fake it with plain `RPCCall`.
+
+## Chunk 3: Compile The IR
+
+### Task 5: Lower the existing execution plan into v2 IR
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+- Test: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+- [ ] Write a failing test that compiles a known query into a non-empty v2 program with stages and fetch records.
+- [ ] Run it to confirm failure.
+- [ ] Implement plan lowering:
+  - stage layout
+  - per-fetch request/response metadata
+  - fallback flags
+  - response path records
+- [ ] Run the focused test again.
+
+### Task 6: Compile descriptor-backed schema tables from day one
+
+**Files:**
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go`
+- Test: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+- [ ] Write a failing test that compiles runtime schema tables for a proto message without requiring generated Go structs.
+- [ ] Run it to confirm failure.
+- [ ] Implement descriptor lowering into stable runtime tables.
+- [ ] Add generated-type handles opportunistically when available.
+- [ ] Run the focused tests again.
+
+## Chunk 4: Build The Runtime
+
+### Task 7: Add a v2 execution kernel with broad fallback
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_fallback.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go`
+
+- [ ] Write a failing test asserting v2 routes unsupported fetches through the v1 fallback and preserves output exactly.
+- [ ] Run it to confirm failure.
+- [ ] Implement runtime stage execution with broad fallback as the default.
+- [ ] Preserve exact output and error behavior.
+- [ ] Run: `cd v2 && go test ./pkg/engine/datasource/grpc_datasource`
+
+### Task 8: Add the first native v2 fetch path
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_request.go`
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_response.go`
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go`
+- Test: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+- [ ] Pick one narrow but real fetch shape and implement native v2 execution for it.
+- [ ] Prefer a standard fetch with no resolver dependencies as the first native path.
+- [ ] Compile request ops and response projection ops for that fetch.
+- [ ] Keep dynamic-schema support by using schema tables, not generated-only logic.
+- [ ] Fallback automatically for everything else.
+- [ ] Run focused tests and full package tests.
+
+## Chunk 5: Comparison Harness
+
+### Task 9: Add v1 vs v2 comparison tests
+
+**Files:**
+- Modify: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go`
+
+- [ ] Add tests that execute the same query through v1 and v2 and compare JSON output byte-for-byte where stable.
+- [ ] Cover:
+  - simple standard fetch
+  - benchmark-dominant field-resolver query
+  - at least one query that forces fallback
+
+### Task 10: Add comparison benchmarks
+
+**Files:**
+- Create: `v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go`
+
+- [ ] Add side-by-side benchmarks:
+  - `Benchmark_DataSource_V1_Load`
+  - `Benchmark_DataSource_V2_Load`
+  - `Benchmark_DataSource_V1_Load_WithFieldArguments`
+  - `Benchmark_DataSource_V2_Load_WithFieldArguments`
+- [ ] Ensure both benchmarks use the same setup and query shapes.
+- [ ] Run: `cd v2 && go test -run '^$' -bench 'Benchmark_DataSource_(V1|V2)_' -benchmem ./pkg/engine/datasource/grpc_datasource`
+
+## Chunk 6: Record The New Phase
+
+### Task 11: Update the improvement ledger for the v2 reset
+
+**Files:**
+- Modify: `IMPROVEMENTS.md`
+
+- [ ] Add a section documenting that v1 was intentionally reset to baseline and why.
+- [ ] Add a section documenting the start of the v2 comparison phase.
+- [ ] Record benchmark results for v1 baseline vs v2 initial engine.
diff --git a/docs/superpowers/specs/2026-04-18-grpc-datasource-v2-design.md b/docs/superpowers/specs/2026-04-18-grpc-datasource-v2-design.md
new file mode 100644
index 0000000000..02da3659d5
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-18-grpc-datasource-v2-design.md
@@ -0,0 +1,260 @@
+# gRPC Datasource V2 Design
+
+## Goal
+
+Introduce a second gRPC datasource implementation that takes a fundamentally different route from the current interpreter-style datasource:
+
+- preserve all current behavior
+- handle generated and dynamic schemas from day one
+- keep the current datasource intact as the compatibility baseline
+- make direct v1 vs v2 benchmarking possible on the same operations
+
+The new engine must be structurally capable of ultra-high performance, even if early iterations still rely on compatibility fallback for portions of behavior.
+
+## Why A Second Datasource
+
+The current datasource has been improved materially, but the remaining ceiling is architectural:
+
+- request construction still depends on generic message semantics
+- dynamic-schema handling still leans on protobuf generic runtime behavior
+- response assembly still builds and merges `astjson` subtrees
+- fallback behavior and performance experimentation are entangled in the same implementation
+
+Keeping the existing datasource as-is and building `DataSourceV2` alongside it gives four advantages:
+
+1. direct correctness comparison against the known-good path
+2. direct benchmark comparison without archaeology
+3. low-risk fallback for unsupported or not-yet-ported behavior
+4. freedom to design a new runtime without contaminating the old one
+
+## Useful Findings From The First Optimization Campaign
+
+These findings should shape v2 from the start:
+
+1. Compiling scheduling out of the hot path helped immediately.
+2. Generated-type allocation was the largest single practical win.
+3. Generic reflection-style resolver/context interpreters were not enough by themselves.
+4. Generated response writing helped, but not enough, because the final response pipeline stayed generic.
+5. Deleting intermediate resolve structures only worked when concurrency was preserved.
+6. The true remaining ceiling is not a single hotspot. It is the interpreter model itself.
+
+That means v2 must not be "v1 plus more fast paths". It needs its own runtime model.
+
+## V2 Thesis
+
+`DataSourceV2` is an operation-compiled engine with its own IR.
+
+Cold path:
+
+- use the existing planner to obtain a correct `RPCExecutionPlan`
+- lower that plan into a compact runtime IR
+- compile proto descriptors into schema tables and access programs
+- precompute request builders, response writers, and fallback boundaries
+
+Hot path:
+
+- bind variables into slots
+- run compiled stage programs
+- build requests using IR, not recursive `RPCMessage` interpretation
+- decode/access protobuf through a schema runtime, not ad hoc reflection
+- write the final response through compiled response programs
+- fall back to v1 for unsupported instructions while preserving exact behavior
+
+## Core Architectural Choice
+
+V2 will have a bytecode-like IR and interpreter runtime.
+
+This is the most radical Go-only route that still preserves behavior:
+
+- more radical than "more generated fast paths"
+- more compatible than rewriting everything into generated Go code only
+- more realistic than requiring all schemas to be compile-time known
+
+The IR exists so both generated and dynamic schemas can share the same execution model.
+
+## High-Level Structure
+
+`DataSourceV2` will live in the same Go package as the current datasource, but as a separate type and constructor:
+
+- `NewDataSource` remains the current baseline engine
+- `NewDataSourceV2` constructs the new engine
+
+Proposed file groups:
+
+- `grpc_datasource_v2.go`
+- `grpc_datasource_v2_ir.go`
+- `grpc_datasource_v2_compile.go`
+- `grpc_datasource_v2_schema.go`
+- `grpc_datasource_v2_runtime.go`
+- `grpc_datasource_v2_request.go`
+- `grpc_datasource_v2_response.go`
+- `grpc_datasource_v2_fallback.go`
+- `grpc_datasource_v2_test.go`
+- `grpc_datasource_v2_bench_test.go`
+
+## Runtime Components
+
+### 1. Planner Bridge
+
+Input:
+
+- `RPCExecutionPlan`
+- compiled proto document
+- mapping
+
+Output:
+
+- `v2Program`
+
+The planner bridge is temporary but important. It lets v2 reuse current planning correctness while replacing execution.
+
+### 2. Schema Runtime
+
+The schema runtime compiles descriptors into stable tables for both generated and dynamic schemas.
+
+Key structures:
+
+- message table
+- field table
+- method table
+- wire/decode metadata
+- generated type handles when available
+- dynamic access programs when generated types are unavailable
+
+The goal is to move from name-driven runtime lookups to integer-indexed runtime access.
+
+### 3. Request IR
+
+Each fetch gets a request program:
+
+- load variable slot
+- load static literal
+- load dependency field
+- begin message
+- set scalar field
+- set enum field
+- append repeated field
+- begin/end oneof branch
+- branch nullability
+
+This is not codegen into Go source. It is a compact executable program.
+
+### 4. Response IR
+
+Each fetch gets a response program:
+
+- decode root result field
+- iterate repeated result items
+- project scalar/message field
+- emit into response slot
+- attach to root path
+- merge entity payload
+
+The key design rule is that v2 should move toward direct final-response writes, not subtree-then-merge as the main architecture.
+
+### 5. Execution Kernel
+
+The kernel owns:
+
+- compiled stages
+- shard-local memory
+- request scratch
+- output slots
+- temporary value vectors
+
+The kernel should be oblivious to mapping semantics at runtime. It just executes IR.
+
+### 6. Compatibility Fallback
+
+Behavior preservation is non-negotiable.
+
+So v2 must support:
+
+- per-fetch fallback to v1 execution for unsupported IR
+- optional whole-operation fallback when mixed-mode would be incorrect
+- correctness-first routing until each feature is ported
+
+Fallback is not failure. It is part of the design.
+
+## Dynamic Schemas From Day One
+
+V2 must not treat dynamic schemas as second-class.
+
+That means:
+
+- compile descriptor-backed message layouts into schema tables at datasource construction time
+- execute request/response programs against those schema tables
+- use generated type handles opportunistically, but never require them
+- keep `dynamicpb` as a compatibility implementation detail only where the new runtime has not replaced behavior yet
+
+Day-one support does not mean day-one peak performance for every dynamic path. It means the architecture and API surface support them natively.
+
+## Compatibility Contract
+
+`DataSourceV2` must preserve all current behavior:
+
+- existing mapping semantics
+- existing federation behavior
+- resolver behavior
+- aliases
+- nullable and optional handling
+- oneofs
+- list wrappers
+- enum mappings
+- entity ordering
+- required-field behavior
+
+If any of those are not supported by a v2 fetch program, that fetch must route through fallback automatically.
+
+## Rollout Strategy
+
+Phase 1:
+
+- revert the v1 package to baseline
+- preserve all findings in docs
+- introduce `DataSourceV2`
+- compile IR and schema tables
+- fallback to v1 broadly
+- add direct v1 vs v2 tests and benchmarks
+
+Phase 2:
+
+- port standard fetch request build to IR runtime
+- port standard response projection to IR runtime
+- prove dynamic-schema path correctness
+
+Phase 3:
+
+- port resolve fetches
+- port entity fetches
+- port required-field fetches
+- reduce fallback surface
+
+Phase 4:
+
+- introduce direct final-response writing
+- eliminate generic subtree merge for supported shapes
+
+## Comparison Strategy
+
+We need explicit side-by-side comparisons:
+
+- correctness tests: v1 output equals v2 output
+- behavior tests: v2 fallback triggers where expected
+- benchmarks:
+  - `Benchmark_DataSource_Load`
+  - `Benchmark_DataSource_Load_WithFieldArguments`
+  - v1 vs v2 variants on identical operations
+
+## Recommended First Breakthrough
+
+The first real architectural milestone is not “make v2 faster than v1 everywhere”.
+
+It is:
+
+1. compile a genuine IR from day one
+2. support dynamic schemas in that IR compiler
+3. keep exact behavior through fallback
+4. establish a stable side-by-side benchmark harness
+
+Once that exists, the next breakthroughs can happen inside v2 without destabilizing v1.
diff --git a/go.work.sum b/go.work.sum
index 1aecd8d220..13bc4bb0c0 100644
--- a/go.work.sum
+++ b/go.work.sum
@@ -1,5 +1,10 @@
+al.essio.dev/pkg/shellescape v1.6.0/go.mod h1:6sIqp7X2P6mThCQ7twERpZTuigpr6KbZWtls1U8I890=
+buf.build/gen/go/bufbuild/hyperpb-examples/protocolbuffers/go v1.36.7-20250725192734-0dd56aa9cbbc.1/go.mod h1:x7jYNX5/7EPnsKHEq596krkOGzvR97/MsZw2fw3Mrq0=
+buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.7-20250717185734-6c6e0d3c608e.1/go.mod h1:eva/VCrd8X7xuJw+JtwCEyrCKiRRASukFqmirnWBvFU=
+buf.build/go/protovalidate v0.14.0/go.mod h1:+F/oISho9MO7gJQNYC2VWLzcO1fTPmaTA08SDYJZncA=
 cel.dev/expr v0.19.1 h1:NciYrtDRIR0lNCnH1LFJegdjspNx9fI59O7TWcua/W4=
 cel.dev/expr v0.19.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
+cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
 cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
 cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
 connectrpc.com/connect v1.16.2 h1:ybd6y+ls7GOlb7Bh5C8+ghA6SvCBajHwxssO2CGFjqE=
@@ -26,6 +31,7 @@ github.com/alicebob/miniredis/v2 v2.34.0 h1:mBFWMaJSNL9RwdGRyEDoAAv8OQc5UlEhLDQg
 github.com/alicebob/miniredis/v2 v2.34.0/go.mod h1:kWShP4b58T1CW0Y5dViCd5ztzrDqRWqM3nksiyXk5s8=
 github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
 github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
+github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
 github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
 github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
 github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A=
@@ -115,9 +121,9 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw
 github.com/golang/glog v1.2.4 h1:CNNw5U8lSiiBk7druxtSHHTsRWcxKoac6kZKm2peBBc=
 github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/google/cel-go v0.26.0/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-containerregistry v0.20.3 h1:oNx7IdTI936V8CQRveCjaxOiegWwvM7kqkbXTpyiovI=
 github.com/google/go-containerregistry v0.20.3/go.mod h1:w00pIgBRDVUDFM6bq+Qx8lwNWK+cxgCuX1vd3PIBDNI=
 github.com/google/renameio v0.1.0 h1:GOZbcHa3HfsPKPlmyPyN2KEohoMXOhdMbHrvbpl2QaA=
@@ -152,6 +158,7 @@ github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYW
 github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM=
 github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
+github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
 github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
 github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
 github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
@@ -166,6 +173,7 @@ github.com/matryer/moq v0.5.2 h1:b2bsanSaO6IdraaIvPBzHnqcrkkQmk1/310HdT2nNQs=
 github.com/matryer/moq v0.5.2/go.mod h1:W/k5PLfou4f+bzke9VPXTbfJljxoeR1tLHigsmbshmU=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/melbahja/goph v1.4.0/go.mod h1:uG+VfK2Dlhk+O32zFrRlc3kYKTlV6+BtvPWd/kK7U68=
 github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
 github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
 github.com/minio/minio-go/v7 v7.0.74 h1:fTo/XlPBTSpo3BAMshlwKL5RspXRv9us5UeHEGYCFe0=
@@ -193,6 +201,7 @@ github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhM
 github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
 github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e h1:aoZm08cpOy4WuID//EZDgcC4zIxODThtZNPirFr42+A=
+github.com/pkg/sftp v1.13.9/go.mod h1:OBN7bVXdstkFFN/gdnHPUb5TE8eb8G1Rp9wCItqjkkA=
 github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
 github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
 github.com/posthog/posthog-go v1.5.5 h1:2o3j7IrHbTIfxRtj4MPaXKeimuTYg49onNzNBZbwksM=
@@ -209,6 +218,7 @@ github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G
 github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
 github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
 github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/protocolbuffers/protoscope v0.0.0-20221109213918-8e7a6aafa2c9/go.mod h1:SKZx6stCn03JN3BOWTwvVIO2ajMkb/zQdTceXYhKw/4=
 github.com/redis/go-redis/v9 v9.4.0 h1:Yzoz33UZw9I/mFhx4MNrB6Fk+XHO1VukNcCa1+lwyKk=
 github.com/redis/go-redis/v9 v9.4.0/go.mod h1:hdY0cQFCN4fnSYT6TkisLufl/4W5UIXyv0b/CLO2V2M=
 github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=
@@ -223,6 +233,7 @@ github.com/spf13/afero v1.10.0 h1:EaGW2JJh15aKOejeuJ+wpFSHnbd7GE6Wvp3TsNhb6LY=
 github.com/spf13/afero v1.10.0/go.mod h1:UBogFpq8E9Hx+xc5CNTTEpTnuHVmXDwZcZcE1eb/UhQ=
 github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
 github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/stoewer/go-strcase v1.3.1/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
@@ -231,8 +242,7 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg=
-github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y=
+github.com/tiendc/go-deepcopy v1.6.1/go.mod h1:toXoeQoUqXOOS/X4sKuiAoSk6elIdqc0pN7MTgOOo2I=
 github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
 github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
 github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
@@ -246,9 +256,9 @@ github.com/twmb/franz-go/pkg/kmsg v1.7.0/go.mod h1:se9Mjdt0Nwzc9lnjJ0HyDtLyBnaBD
 github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnnbo=
 github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
 github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
+github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
 github.com/wundergraph/go-arena v0.0.0-20251008210416-55cb97e6f68f h1:5snewyMaIpajTu4wj22L/DgrGimICqXtUVjkZInBH3Y=
 github.com/wundergraph/go-arena v0.0.0-20251008210416-55cb97e6f68f/go.mod h1:ROOysEHWJjLQ8FSfNxZCziagb7Qw2nXY3/vgKRh7eWw=
-github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
 github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
 github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
 github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
@@ -299,6 +309,7 @@ golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
 golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
+golang.org/x/exp v0.0.0-20250813145105-42675adae3e6/go.mod h1:4QTo5u+SEIbbKW1RacMZq1YEfOBqeXa19JeshGi+zc4=
 golang.org/x/image v0.6.0 h1:bR8b5okrPI3g/gyZakLZHeWxAR8Dn5CyxXv1hLH5g/4=
 golang.org/x/image v0.6.0/go.mod h1:MXLdDR43H7cDJq5GEGXEVeeNhPgi+YYEQ2pC1byI1x0=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
@@ -316,6 +327,7 @@ golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
 golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
+golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
 golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
 golang.org/x/net v0.45.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
 golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
@@ -335,6 +347,7 @@ golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
 golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 h1:LvzTn0GQhWuvKH/kVRS3R3bVAsdQWI7hvfLHGgh9+lU=
@@ -345,14 +358,13 @@ golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
 golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
-golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
-golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
 golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
 golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
 golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
@@ -367,10 +379,16 @@ gonum.org/v1/plot v0.10.1 h1:dnifSs43YJuNMDzB7v8wV64O4ABBHReuAVAoBxqBqS4=
 gonum.org/v1/plot v0.10.1/go.mod h1:VZW5OlhkL1mysU9vaqNHnsy86inf6Ot+jB3r+BczCEo=
 google.golang.org/genproto/googleapis/api v0.0.0-20250106144421-5f5ef82da422 h1:GVIKPyP/kLIyVOgOnTwFOrvQaQUzOzGMCxgFUOEmm24=
 google.golang.org/genproto/googleapis/api v0.0.0-20250106144421-5f5ef82da422/go.mod h1:b6h1vNKhxaSoEI+5jc3PJUCustfli/mRab7295pY7rw=
+google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a/go.mod h1:y2yVLIE/CSMCPXaHnSKXxu1spLPnglFLegmgdY23uuE=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250811230008-5f3141c8851a h1:tPE/Kp+x9dMSwUm/uM0JKK0IfdiJkwAbSMSeZBXXJXc=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
 google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
 gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
 honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM=
 rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4=
diff --git a/v2/go.mod b/v2/go.mod
index ad5d096fc1..6b7355abbf 100644
--- a/v2/go.mod
+++ b/v2/go.mod
@@ -44,6 +44,7 @@ require (
 )
 
 require (
+	buf.build/go/hyperpb v0.1.3 // indirect
 	github.com/agnivade/levenshtein v1.2.1 // indirect
 	github.com/bitfield/gotestdox v0.2.2 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
@@ -70,6 +71,7 @@ require (
 	github.com/sosodev/duration v1.3.1 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/timandy/routine v1.1.5 // indirect
 	github.com/urfave/cli/v2 v2.27.7 // indirect
 	github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
@@ -77,7 +79,7 @@ require (
 	golang.org/x/net v0.46.0 // indirect
 	golang.org/x/term v0.36.0 // indirect
 	golang.org/x/tools v0.38.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250811230008-5f3141c8851a // indirect
 	gopkg.in/cenkalti/backoff.v1 v1.1.0 // indirect
 	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/v2/go.sum b/v2/go.sum
index 13adfeb881..b49dfba9b2 100644
--- a/v2/go.sum
+++ b/v2/go.sum
@@ -1,3 +1,5 @@
+buf.build/go/hyperpb v0.1.3 h1:wiw2F7POvAe2VA2kkB0TAsFwj91lXbFrKM41D3ZgU1w=
+buf.build/go/hyperpb v0.1.3/go.mod h1:IHXAM5qnS0/Fsnd7/HGDghFNvUET646WoHmq1FDZXIE=
 github.com/99designs/gqlgen v0.17.76 h1:YsJBcfACWmXWU2t1yCjoGdOmqcTfOFpjbLAE443fmYI=
 github.com/99designs/gqlgen v0.17.76/go.mod h1:miiU+PkAnTIDKMQ1BseUOIVeQHoiwYDZGCswoxl7xec=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
@@ -137,6 +139,8 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
 github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
 github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg=
 github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y=
+github.com/timandy/routine v1.1.5 h1:LSpm7Iijwb9imIPlucl4krpr2EeCeAUvifiQ9Uf5X+M=
+github.com/timandy/routine v1.1.5/go.mod h1:kXslgIosdY8LW0byTyPnenDgn4/azt2euufAq9rK51w=
 github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
 github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
 github.com/vektah/gqlparser/v2 v2.5.30 h1:EqLwGAFLIzt1wpx1IPpY67DwUujF1OfzgEyDsLrN6kE=
@@ -214,6 +218,7 @@ gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
 gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250811230008-5f3141c8851a/go.mod h1:gw1tLEfykwDz2ET4a12jcXt4couGAm7IwsVaTy0Sflo=
 google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0=
 google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw=
 google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go
new file mode 100644
index 0000000000..cb8f3a78ee
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2.go
@@ -0,0 +1,423 @@
+package grpcdatasource
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"sync"
+
+	"buf.build/go/hyperpb"
+	"github.com/cespare/xxhash/v2"
+	"github.com/tidwall/gjson"
+	"golang.org/x/sync/errgroup"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+	protoref "google.golang.org/protobuf/reflect/protoreflect"
+
+	"github.com/wundergraph/astjson"
+	"github.com/wundergraph/go-arena"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/datasource/httpclient"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/internal/unsafebytes"
+)
+
+var _ resolve.DataSource = (*DataSourceV2)(nil)
+var _ resolve.NativeDataSource = (*DataSourceV2)(nil)
+var _ resolve.NativeMergeDataSource = (*DataSourceV2)(nil)
+
+type DataSourceV2 struct {
+	program       *v2Program
+	schema        *v2SchemaRuntime
+	fallback      *DataSource
+	responseFrame sync.Pool
+	valuePool     *arena.Pool
+	hyperpbShared sync.Pool
+	callOptions   []grpc.CallOption
+}
+
+func NewDataSourceV2(client grpc.ClientConnInterface, config DataSourceConfig) (*DataSourceV2, error) {
+	fallback, err := NewDataSource(client, config)
+	if err != nil {
+		return nil, err
+	}
+
+	schema, err := newV2SchemaRuntime(config.Compiler)
+	if err != nil {
+		return nil, err
+	}
+
+	program, err := compileV2Program(fallback.plan, schema, config.Compiler)
+	if err != nil {
+		return nil, err
+	}
+
+	ds := &DataSourceV2{
+		program:   program,
+		schema:    schema,
+		fallback:  fallback,
+		valuePool: arena.NewArenaPool(),
+	}
+	ds.responseFrame.New = func() any {
+		return newV2ResponseFrameBuilder()
+	}
+	ds.hyperpbShared.New = func() any {
+		return new(hyperpb.Shared)
+	}
+	ds.callOptions = []grpc.CallOption{grpc.ForceCodec(v2HyperpbCodec{})}
+
+	return ds, nil
+}
+
+func (d *DataSourceV2) Load(ctx context.Context, headers http.Header, input []byte) ([]byte, error) {
+	value, cleanup, err := d.LoadValue(ctx, headers, input)
+	if cleanup != nil {
+		defer cleanup()
+	}
+	if err != nil {
+		return nil, err
+	}
+	return value.MarshalTo(nil), nil
+}
+
+func (d *DataSourceV2) LoadResult(ctx context.Context, headers http.Header, input []byte) (resolve.NativeMergeResult, func(), error) {
+	if !d.program.nativeOperation || d.fallback.disabled {
+		return nil, nil, nil
+	}
+
+	variables := gjson.Parse(unsafebytes.BytesToString(input)).Get("body.variables")
+	builder := newJSONBuilder(nil, d.fallback.mapping, variables)
+	response := d.acquireResponseFrame()
+	var shareds []*hyperpb.Shared
+	cleanup := func() {
+		for _, shared := range shareds {
+			d.releaseHyperpbShared(shared)
+		}
+		d.releaseResponseFrame(response)
+	}
+
+	if len(headers) > 0 {
+		pairs := make([]string, 0, len(headers)*2)
+		for headerName, headerValues := range headers {
+			headerName = strings.ToLower(headerName)
+			for _, v := range headerValues {
+				pairs = append(pairs, headerName, v)
+			}
+		}
+		ctx = metadata.AppendToOutgoingContext(ctx, pairs...)
+	}
+
+	root := response.newObject()
+	outputs := make(map[int]protoref.Message, len(d.program.stages))
+	for _, stage := range d.program.stages {
+		results := make([]v2NativeResponse, len(stage.fetches))
+		errGrp, errGrpCtx := errgroup.WithContext(ctx)
+
+		for index := range stage.fetches {
+			fetch := stage.fetches[index]
+			errGrp.Go(func() error {
+				var (
+					request any
+					skip    bool
+					err     error
+					shared  *hyperpb.Shared
+				)
+
+				if fetch.kind == CallKindResolve {
+					var dependencyOutput protoref.Message
+					if len(fetch.dependencies) > 0 {
+						dependencyOutput = outputs[fetch.dependencies[0]]
+					}
+					request, skip, err = fetch.request.buildWithDependency(variables, dependencyOutput, d.schema, d.fallback.rc)
+				} else {
+					request, err = fetch.request.buildInput(variables, d.schema, d.fallback.rc)
+				}
+				if err != nil {
+					return err
+				}
+				if skip {
+					results[index] = v2NativeResponse{
+						kind:         fetch.kind,
+						responsePath: fetch.responsePath,
+						skip:         true,
+					}
+					return nil
+				}
+
+				if fetch.response.message.generatedType == nil && fetch.response.message.hyperType != nil {
+					shared = d.acquireHyperpbShared()
+				}
+				responseMessage := fetch.response.message.newDecodeMessage(shared)
+				requestArg := request
+				if message, ok := request.(protoref.Message); ok {
+					requestArg = message.Interface()
+				}
+				if err := d.fallback.cc.Invoke(errGrpCtx, "/"+fetch.serviceName+"/"+fetch.methodName, requestArg, responseMessage.Interface(), d.callOptions...); err != nil {
+					if shared != nil {
+						d.releaseHyperpbShared(shared)
+					}
+					return err
+				}
+				if fetch.kind == CallKindEntity {
+					if err := fetch.response.validateFederatedOutput(builder, responseMessage); err != nil {
+						if shared != nil {
+							d.releaseHyperpbShared(shared)
+						}
+						return err
+					}
+				}
+
+				results[index] = v2NativeResponse{
+					kind:         fetch.kind,
+					responsePath: fetch.responsePath,
+					output:       responseMessage,
+					shared:       shared,
+				}
+				return nil
+			})
+		}
+
+		if err := errGrp.Wait(); err != nil {
+			cleanup()
+			return nil, nil, nil
+		}
+
+		for index, result := range results {
+			if result.skip {
+				continue
+			}
+
+			outputs[stage.fetches[index].id] = result.output
+			if result.shared != nil {
+				shareds = append(shareds, result.shared)
+			}
+
+			if err := stage.fetches[index].response.attach(builder, response, root, result.output, result.kind, result.responsePath); err != nil {
+				cleanup()
+				return nil, nil, nil
+			}
+		}
+	}
+
+	return &v2NativeMergeResult{frame: response, root: root}, cleanup, nil
+}
+
+func (d *DataSourceV2) LoadWithFilesResult(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (resolve.NativeMergeResult, func(), error) {
+	return nil, nil, nil
+}
+
+func (d *DataSourceV2) LoadValue(ctx context.Context, headers http.Header, input []byte) (*astjson.Value, func(), error) {
+	if !d.program.nativeOperation {
+		return d.parseFallbackBytes(ctx, headers, input)
+	}
+
+	variables := gjson.Parse(unsafebytes.BytesToString(input)).Get("body.variables")
+	builder := newJSONBuilder(nil, d.fallback.mapping, variables)
+	response := d.acquireResponseFrame()
+	item := d.valuePool.Acquire(xxhash.Sum64(input))
+	var shareds []*hyperpb.Shared
+	cleanup := func() {
+		for _, shared := range shareds {
+			d.releaseHyperpbShared(shared)
+		}
+		d.releaseResponseFrame(response)
+		d.valuePool.Release(item)
+	}
+
+	if d.fallback.disabled {
+		value, err := astjson.ParseBytesWithArena(item.Arena, builder.writeErrorBytes(fmt.Errorf("gRPC datasource needs to be enabled to be used")))
+		if err != nil {
+			cleanup()
+			return nil, nil, err
+		}
+		return value, cleanup, nil
+	}
+
+	if len(headers) > 0 {
+		pairs := make([]string, 0, len(headers)*2)
+		for headerName, headerValues := range headers {
+			headerName = strings.ToLower(headerName)
+			for _, v := range headerValues {
+				pairs = append(pairs, headerName, v)
+			}
+		}
+		ctx = metadata.AppendToOutgoingContext(ctx, pairs...)
+	}
+
+	root := response.newObject()
+	outputs := make(map[int]protoref.Message, len(d.program.stages))
+	for _, stage := range d.program.stages {
+		results := make([]v2NativeResponse, len(stage.fetches))
+		errGrp, errGrpCtx := errgroup.WithContext(ctx)
+
+		for index := range stage.fetches {
+			fetch := stage.fetches[index]
+			errGrp.Go(func() error {
+				var (
+					request any
+					skip    bool
+					err     error
+					shared  *hyperpb.Shared
+				)
+
+				if fetch.kind == CallKindResolve {
+					var dependencyOutput protoref.Message
+					if len(fetch.dependencies) > 0 {
+						dependencyOutput = outputs[fetch.dependencies[0]]
+					}
+					request, skip, err = fetch.request.buildWithDependency(variables, dependencyOutput, d.schema, d.fallback.rc)
+				} else {
+					request, err = fetch.request.buildInput(variables, d.schema, d.fallback.rc)
+				}
+				if err != nil {
+					return err
+				}
+				if skip {
+					results[index] = v2NativeResponse{
+						kind:         fetch.kind,
+						responsePath: fetch.responsePath,
+						skip:         true,
+					}
+					return nil
+				}
+
+				if fetch.response.message.generatedType == nil && fetch.response.message.hyperType != nil {
+					shared = d.acquireHyperpbShared()
+				}
+				response := fetch.response.message.newDecodeMessage(shared)
+				requestArg := request
+				if message, ok := request.(protoref.Message); ok {
+					requestArg = message.Interface()
+				}
+				if err := d.fallback.cc.Invoke(errGrpCtx, "/"+fetch.serviceName+"/"+fetch.methodName, requestArg, response.Interface(), d.callOptions...); err != nil {
+					if shared != nil {
+						d.releaseHyperpbShared(shared)
+					}
+					return err
+				}
+				if fetch.kind == CallKindEntity {
+					if err := fetch.response.validateFederatedOutput(builder, response); err != nil {
+						if shared != nil {
+							d.releaseHyperpbShared(shared)
+						}
+						return err
+					}
+				}
+
+				results[index] = v2NativeResponse{
+					kind:         fetch.kind,
+					responsePath: fetch.responsePath,
+					output:       response,
+					shared:       shared,
+				}
+				return nil
+			})
+		}
+
+		if err := errGrp.Wait(); err != nil {
+			value, parseErr := astjson.ParseBytesWithArena(item.Arena, builder.writeErrorBytes(err))
+			if parseErr != nil {
+				cleanup()
+				return nil, nil, parseErr
+			}
+			return value, cleanup, nil
+		}
+
+		for index, result := range results {
+			if result.skip {
+				continue
+			}
+
+			outputs[stage.fetches[index].id] = result.output
+			if result.shared != nil {
+				shareds = append(shareds, result.shared)
+			}
+
+			if err := stage.fetches[index].response.attach(builder, response, root, result.output, result.kind, result.responsePath); err != nil {
+				value, parseErr := astjson.ParseBytesWithArena(item.Arena, builder.writeErrorBytes(err))
+				if parseErr != nil {
+					cleanup()
+					return nil, nil, parseErr
+				}
+				return value, cleanup, nil
+			}
+		}
+	}
+
+	value := response.dataEnvelopeValue(item.Arena, root)
+	return value, cleanup, nil
+}
+
+func (d *DataSourceV2) LoadWithFiles(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) ([]byte, error) {
+	return d.fallback.LoadWithFiles(ctx, headers, input, files)
+}
+
+func (d *DataSourceV2) LoadWithFilesValue(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (*astjson.Value, func(), error) {
+	return d.parseFallbackBytesWithFiles(ctx, headers, input, files)
+}
+
+func (d *DataSourceV2) acquireResponseFrame() *v2ResponseFrameBuilder {
+	frame, _ := d.responseFrame.Get().(*v2ResponseFrameBuilder)
+	if frame == nil {
+		frame = newV2ResponseFrameBuilder()
+	}
+	frame.reset()
+	return frame
+}
+
+func (d *DataSourceV2) releaseResponseFrame(frame *v2ResponseFrameBuilder) {
+	if frame == nil {
+		return
+	}
+	frame.reset()
+	d.responseFrame.Put(frame)
+}
+
+func (d *DataSourceV2) acquireHyperpbShared() *hyperpb.Shared {
+	shared, _ := d.hyperpbShared.Get().(*hyperpb.Shared)
+	if shared == nil {
+		shared = new(hyperpb.Shared)
+	}
+	return shared
+}
+
+func (d *DataSourceV2) releaseHyperpbShared(shared *hyperpb.Shared) {
+	if shared == nil {
+		return
+	}
+	shared.Free()
+	d.hyperpbShared.Put(shared)
+}
+
+func (d *DataSourceV2) parseFallbackBytes(ctx context.Context, headers http.Header, input []byte) (*astjson.Value, func(), error) {
+	data, err := d.fallback.Load(ctx, headers, input)
+	if err != nil {
+		return nil, nil, err
+	}
+	item := d.valuePool.Acquire(xxhash.Sum64(input))
+	value, err := astjson.ParseBytesWithArena(item.Arena, data)
+	if err != nil {
+		d.valuePool.Release(item)
+		return nil, nil, err
+	}
+	return value, func() {
+		d.valuePool.Release(item)
+	}, nil
+}
+
+func (d *DataSourceV2) parseFallbackBytesWithFiles(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (*astjson.Value, func(), error) {
+	data, err := d.fallback.LoadWithFiles(ctx, headers, input, files)
+	if err != nil {
+		return nil, nil, err
+	}
+	item := d.valuePool.Acquire(xxhash.Sum64(input))
+	value, err := astjson.ParseBytesWithArena(item.Arena, data)
+	if err != nil {
+		d.valuePool.Release(item)
+		return nil, nil, err
+	}
+	return value, func() {
+		d.valuePool.Release(item)
+	}, nil
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go
new file mode 100644
index 0000000000..2eea3b059f
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_bench_test.go
@@ -0,0 +1,404 @@
+package grpcdatasource
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/wundergraph/go-arena"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/astparser"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/grpctest"
+)
+
+func Benchmark_DataSource_V1_Load(b *testing.B) {
+	benchmarkDataSourceVersionLoad(b, false)
+}
+
+func Benchmark_DataSource_V2_Load(b *testing.B) {
+	benchmarkDataSourceVersionLoad(b, true)
+}
+
+func Benchmark_DataSource_V2_LoadValue(b *testing.B) {
+	benchmarkDataSourceV2LoadValue(b, benchmarkScenarioLoad)
+}
+
+func Benchmark_DataSource_V2_LoadResult(b *testing.B) {
+	benchmarkDataSourceV2LoadResult(b, benchmarkScenarioLoad)
+}
+
+func Benchmark_DataSource_V1_Load_WithFieldArguments(b *testing.B) {
+	benchmarkDataSourceVersionLoadWithFieldArguments(b, false)
+}
+
+func Benchmark_DataSource_V2_Load_WithFieldArguments(b *testing.B) {
+	benchmarkDataSourceVersionLoadWithFieldArguments(b, true)
+}
+
+func Benchmark_DataSource_V2_LoadValue_WithFieldArguments(b *testing.B) {
+	benchmarkDataSourceV2LoadValue(b, benchmarkScenarioLoadWithFieldArguments)
+}
+
+func Benchmark_DataSource_V2_LoadResult_WithFieldArguments(b *testing.B) {
+	benchmarkDataSourceV2LoadResult(b, benchmarkScenarioLoadWithFieldArguments)
+}
+
+func Benchmark_DataSource_V1_Load_FederationFanout(b *testing.B) {
+	benchmarkDataSourceVersionLoadFederationFanout(b, false)
+}
+
+func Benchmark_DataSource_V2_Load_FederationFanout(b *testing.B) {
+	benchmarkDataSourceVersionLoadFederationFanout(b, true)
+}
+
+func Benchmark_DataSource_V2_LoadValue_FederationFanout(b *testing.B) {
+	benchmarkDataSourceV2LoadValue(b, benchmarkScenarioFederationFanout)
+}
+
+func Benchmark_DataSource_V2_LoadResult_FederationFanout(b *testing.B) {
+	benchmarkDataSourceV2LoadResult(b, benchmarkScenarioFederationFanout)
+}
+
+func Benchmark_DataSource_V1_Load_FederationRequiresUnion(b *testing.B) {
+	benchmarkDataSourceVersionLoadFederationRequiresUnion(b, false)
+}
+
+func Benchmark_DataSource_V2_Load_FederationRequiresUnion(b *testing.B) {
+	benchmarkDataSourceVersionLoadFederationRequiresUnion(b, true)
+}
+
+func Benchmark_DataSource_V2_LoadValue_FederationRequiresUnion(b *testing.B) {
+	benchmarkDataSourceV2LoadValue(b, benchmarkScenarioFederationRequiresUnion)
+}
+
+func Benchmark_DataSource_V2_LoadResult_FederationRequiresUnion(b *testing.B) {
+	benchmarkDataSourceV2LoadResult(b, benchmarkScenarioFederationRequiresUnion)
+}
+
+type benchmarkScenario int
+
+const (
+	benchmarkScenarioLoad benchmarkScenario = iota
+	benchmarkScenarioLoadWithFieldArguments
+	benchmarkScenarioFederationFanout
+	benchmarkScenarioFederationRequiresUnion
+)
+
+func benchmarkDataSourceVersionLoad(b *testing.B, useV2 bool) {
+	conn, cleanup := setupTestGRPCServer(b)
+	b.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(b)
+	query := `query ComplexFilterTypeQuery($filter: ComplexFilterTypeInput!) { complexFilterType(filter: $filter) { id name } }`
+	variables := `{"variables":{"filter":{"filter":{"name":"test","filterField1":"test","filterField2":"test"}}}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	if report.HasErrors() {
+		b.Fatalf("failed to parse query: %s", report.Error())
+	}
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(b), testMapping())
+	require.NoError(b, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	if useV2 {
+		ds, err := NewDataSourceV2(conn, DataSourceConfig{
+			Operation:    &queryDoc,
+			Definition:   &schemaDoc,
+			SubgraphName: "Products",
+			Compiler:     compiler,
+			Mapping:      testMapping(),
+		})
+		require.NoError(b, err)
+		b.ReportAllocs()
+		b.ResetTimer()
+		for b.Loop() {
+			_, err = ds.Load(context.Background(), nil, input)
+			require.NoError(b, err)
+		}
+		return
+	}
+
+	ds, err := NewDataSource(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(b, err)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		_, err = ds.Load(context.Background(), nil, input)
+		require.NoError(b, err)
+	}
+}
+
+func benchmarkDataSourceVersionLoadWithFieldArguments(b *testing.B, useV2 bool) {
+	conn, cleanup := setupTestGRPCServer(b)
+	b.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(b)
+	query := `query CategoriesWithNullableTypes($nullType: String, $valueType: String) { categories { nullMetrics: categoryMetrics(metricType: $nullType) { id metricType value } valueMetrics: categoryMetrics(metricType: $valueType) { id metricType value } } }`
+	variables := `{"variables":{"nullType":"unavailable","valueType":"popularity_score"}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	if report.HasErrors() {
+		b.Fatalf("failed to parse query: %s", report.Error())
+	}
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(b), testMapping())
+	require.NoError(b, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	if useV2 {
+		ds, err := NewDataSourceV2(conn, DataSourceConfig{
+			Operation:    &queryDoc,
+			Definition:   &schemaDoc,
+			SubgraphName: "Products",
+			Compiler:     compiler,
+			Mapping:      testMapping(),
+		})
+		require.NoError(b, err)
+		b.ReportAllocs()
+		b.ResetTimer()
+		for b.Loop() {
+			_, err = ds.Load(context.Background(), nil, input)
+			require.NoError(b, err)
+		}
+		return
+	}
+
+	ds, err := NewDataSource(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(b, err)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		_, err = ds.Load(context.Background(), nil, input)
+		require.NoError(b, err)
+	}
+}
+
+func benchmarkDataSourceV2LoadValue(b *testing.B, scenario benchmarkScenario) {
+	conn, cleanup := setupTestGRPCServer(b)
+	b.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(b)
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(b), testMapping())
+	require.NoError(b, err)
+
+	query, variables, federationConfigs := benchmarkScenarioInput(scenario)
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	if report.HasErrors() {
+		b.Fatalf("failed to parse query: %s", report.Error())
+	}
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:         &queryDoc,
+		Definition:        &schemaDoc,
+		SubgraphName:      "Products",
+		Compiler:          compiler,
+		Mapping:           testMapping(),
+		FederationConfigs: federationConfigs,
+	})
+	require.NoError(b, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		value, release, err := ds.LoadValue(context.Background(), nil, input)
+		require.NoError(b, err)
+		require.NotNil(b, value)
+		release()
+	}
+}
+
+func benchmarkDataSourceV2LoadResult(b *testing.B, scenario benchmarkScenario) {
+	conn, cleanup := setupTestGRPCServer(b)
+	b.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(b)
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(b), testMapping())
+	require.NoError(b, err)
+
+	query, variables, federationConfigs := benchmarkScenarioInput(scenario)
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	if report.HasErrors() {
+		b.Fatalf("failed to parse query: %s", report.Error())
+	}
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:         &queryDoc,
+		Definition:        &schemaDoc,
+		SubgraphName:      "Products",
+		Compiler:          compiler,
+		Mapping:           testMapping(),
+		FederationConfigs: federationConfigs,
+	})
+	require.NoError(b, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	mergePool := arena.NewArenaPool()
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; b.Loop(); i++ {
+		result, release, err := ds.LoadResult(context.Background(), nil, input)
+		require.NoError(b, err)
+		require.NotNil(b, result)
+		mergeItem := mergePool.Acquire(uint64(i + 1))
+		merged, err := result.MergeInto(mergeItem.Arena, nil, resolve.PostProcessingConfiguration{SelectResponseDataPath: []string{"data"}}, nil)
+		require.NoError(b, err)
+		require.NotNil(b, merged)
+		release()
+		mergePool.Release(mergeItem)
+	}
+}
+
+func benchmarkDataSourceVersionLoadFederationFanout(b *testing.B, useV2 bool) {
+	conn, cleanup := setupTestGRPCServer(b)
+	b.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(b)
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(b), testMapping())
+	require.NoError(b, err)
+
+	query, variables, federationConfigs := benchmarkScenarioInput(benchmarkScenarioFederationFanout)
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	if report.HasErrors() {
+		b.Fatalf("failed to parse query: %s", report.Error())
+	}
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	if useV2 {
+		ds, err := NewDataSourceV2(conn, DataSourceConfig{
+			Operation:         &queryDoc,
+			Definition:        &schemaDoc,
+			SubgraphName:      "Products",
+			Compiler:          compiler,
+			Mapping:           testMapping(),
+			FederationConfigs: federationConfigs,
+		})
+		require.NoError(b, err)
+		b.ReportAllocs()
+		b.ResetTimer()
+		for b.Loop() {
+			_, err = ds.Load(context.Background(), nil, input)
+			require.NoError(b, err)
+		}
+		return
+	}
+
+	ds, err := NewDataSource(conn, DataSourceConfig{
+		Operation:         &queryDoc,
+		Definition:        &schemaDoc,
+		SubgraphName:      "Products",
+		Compiler:          compiler,
+		Mapping:           testMapping(),
+		FederationConfigs: federationConfigs,
+	})
+	require.NoError(b, err)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		_, err = ds.Load(context.Background(), nil, input)
+		require.NoError(b, err)
+	}
+}
+
+func benchmarkDataSourceVersionLoadFederationRequiresUnion(b *testing.B, useV2 bool) {
+	conn, cleanup := setupTestGRPCServer(b)
+	b.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(b)
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(b), testMapping())
+	require.NoError(b, err)
+
+	query, variables, federationConfigs := benchmarkScenarioInput(benchmarkScenarioFederationRequiresUnion)
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	if report.HasErrors() {
+		b.Fatalf("failed to parse query: %s", report.Error())
+	}
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	if useV2 {
+		ds, err := NewDataSourceV2(conn, DataSourceConfig{
+			Operation:         &queryDoc,
+			Definition:        &schemaDoc,
+			SubgraphName:      "Products",
+			Compiler:          compiler,
+			Mapping:           testMapping(),
+			FederationConfigs: federationConfigs,
+		})
+		require.NoError(b, err)
+		b.ReportAllocs()
+		b.ResetTimer()
+		for b.Loop() {
+			_, err = ds.Load(context.Background(), nil, input)
+			require.NoError(b, err)
+		}
+		return
+	}
+
+	ds, err := NewDataSource(conn, DataSourceConfig{
+		Operation:         &queryDoc,
+		Definition:        &schemaDoc,
+		SubgraphName:      "Products",
+		Compiler:          compiler,
+		Mapping:           testMapping(),
+		FederationConfigs: federationConfigs,
+	})
+	require.NoError(b, err)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		_, err = ds.Load(context.Background(), nil, input)
+		require.NoError(b, err)
+	}
+}
+
+func benchmarkScenarioInput(scenario benchmarkScenario) (query, variables string, federationConfigs plan.FederationFieldConfigurations) {
+	switch scenario {
+	case benchmarkScenarioLoad:
+		return `query ComplexFilterTypeQuery($filter: ComplexFilterTypeInput!) { complexFilterType(filter: $filter) { id name } }`,
+			`{"variables":{"filter":{"filter":{"name":"test","filterField1":"test","filterField2":"test"}}}}`,
+			nil
+	case benchmarkScenarioLoadWithFieldArguments:
+		return `query CategoriesWithNullableTypes($nullType: String, $valueType: String) { categories { nullMetrics: categoryMetrics(metricType: $nullType) { id metricType value } valueMetrics: categoryMetrics(metricType: $valueType) { id metricType value } } }`,
+			`{"variables":{"nullType":"unavailable","valueType":"popularity_score"}}`,
+			nil
+	case benchmarkScenarioFederationFanout:
+		return `query($representations: [_Any!]!, $input: ShippingEstimateInput!) { _entities(representations: $representations) { ...on Product { id name price shippingEstimate(input: $input) } } }`,
+			`{"variables":{"representations":[{"__typename":"Product","id":"1"},{"__typename":"Product","id":"2"},{"__typename":"Product","id":"3"}],"input":{"destination":"INTERNATIONAL","weight":10.0,"expedited":true}}}`,
+			plan.FederationFieldConfigurations{
+				{
+					TypeName:     "Product",
+					SelectionSet: "id",
+				},
+			}
+	case benchmarkScenarioFederationRequiresUnion:
+		return `query($representations: [_Any!]!, $checkHealth: Boolean!) { _entities(representations: $representations) { ...on Storage { __typename id tagSummary storageStatus(checkHealth: $checkHealth) { ... on ActionSuccess { message timestamp } ... on ActionError { message code } } } } }`,
+			`{"variables":{"representations":[{"__typename":"Storage","id":"1","tags":["electronics","gadgets","sale"]},{"__typename":"Storage","id":"2","tags":["books","fiction"]},{"__typename":"Storage","id":"3","tags":[]}],"checkHealth":true}}`,
+			plan.FederationFieldConfigurations{
+				{
+					TypeName:     "Storage",
+					SelectionSet: "id",
+				},
+				{
+					TypeName:     "Storage",
+					FieldName:    "tagSummary",
+					SelectionSet: "tags",
+				},
+			}
+	default:
+		panic("unsupported benchmark scenario")
+	}
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go
new file mode 100644
index 0000000000..d1ba235526
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_compile.go
@@ -0,0 +1,462 @@
+package grpcdatasource
+
+import (
+	"fmt"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
+)
+
+func compileV2Program(plan *RPCExecutionPlan, schema *v2SchemaRuntime, compiler *RPCCompiler) (*v2Program, error) {
+	stageIndexes, err := compileV2StageIndexes(plan)
+	if err != nil {
+		return nil, err
+	}
+
+	stageCount := 0
+	for _, idx := range stageIndexes {
+		if idx+1 > stageCount {
+			stageCount = idx + 1
+		}
+	}
+
+	stageMap := make(map[int][]v2Fetch, stageCount)
+	program := &v2Program{
+		stages: make([]v2Stage, 0, stageCount),
+	}
+
+	for i := range plan.Calls {
+		call := &plan.Calls[i]
+		fetch, err := compileV2Fetch(plan, call, schema, compiler)
+		if err != nil {
+			return nil, err
+		}
+		stageMap[stageIndexes[call.ID]] = append(stageMap[stageIndexes[call.ID]], fetch)
+		if !fetch.native {
+			program.requiresFallback = true
+			program.fallbackReasons = append(program.fallbackReasons, fmt.Sprintf("call %d (%s): %s", call.ID, call.MethodName, fetch.fallbackReason))
+		}
+	}
+
+	for i := 0; i < stageCount; i++ {
+		program.stages = append(program.stages, v2Stage{fetches: stageMap[i]})
+	}
+
+	program.nativeOperation = !program.requiresFallback
+	return program, nil
+}
+
+func compileV2Fetch(plan *RPCExecutionPlan, call *RPCCall, schema *v2SchemaRuntime, compiler *RPCCompiler) (v2Fetch, error) {
+	serviceName, ok := compiler.resolveServiceName(call)
+	if !ok {
+		return v2Fetch{}, fmt.Errorf("failed to resolve service name for method %s", call.MethodName)
+	}
+
+	fetch := v2Fetch{
+		id:           call.ID,
+		kind:         call.Kind,
+		dependencies: append([]int(nil), call.DependentCalls...),
+		serviceName:  serviceName,
+		methodName:   call.MethodName,
+		responsePath: call.ResponsePath,
+	}
+
+	requestRuntime, ok := schema.messageRuntime(call.Request.Name)
+	if !ok {
+		fetch.fallbackReason = "request runtime missing"
+		return fetch, nil
+	}
+
+	responseRuntime, ok := schema.messageRuntime(call.Response.Name)
+	if !ok {
+		fetch.fallbackReason = "response runtime missing"
+		return fetch, nil
+	}
+
+	var (
+		requestProgram *v2RequestProgram
+		err            error
+	)
+	switch call.Kind {
+	case CallKindStandard, CallKindEntity, CallKindRequired:
+		if len(call.DependentCalls) > 0 {
+			fetch.fallbackReason = "dependent standard/entity/required fetches are routed through v1"
+			return fetch, nil
+		}
+		requestProgram, err = compileV2RequestProgram(requestRuntime, &call.Request)
+	case CallKindResolve:
+		if len(call.DependentCalls) != 1 {
+			fetch.fallbackReason = "resolve fetches require exactly one dependency"
+			return fetch, nil
+		}
+		dependencyCall := &plan.Calls[call.DependentCalls[0]]
+		dependencyRuntime, ok := schema.messageRuntime(dependencyCall.Response.Name)
+		if !ok {
+			fetch.fallbackReason = "resolve dependency runtime missing"
+			return fetch, nil
+		}
+		requestProgram, err = compileV2ResolveRequestProgram(requestRuntime, &call.Request, dependencyRuntime)
+	default:
+		fetch.fallbackReason = "fetch kind is routed through v1"
+		return fetch, nil
+	}
+	if err != nil {
+		fetch.fallbackReason = err.Error()
+		return fetch, nil
+	}
+
+	responseProgram, err := compileV2ResponseProgram(schema, responseRuntime, &call.Response)
+	if err != nil {
+		fetch.fallbackReason = err.Error()
+		return fetch, nil
+	}
+
+	fetch.request = requestProgram
+	fetch.response = responseProgram
+	fetch.native = true
+	return fetch, nil
+}
+
+func compileV2RequestProgram(runtime *v2MessageRuntime, message *RPCMessage) (*v2RequestProgram, error) {
+	if message == nil {
+		return nil, fmt.Errorf("request rpc message is nil")
+	}
+	if message.IsOneOf() {
+		return nil, fmt.Errorf("oneof request messages are not yet supported natively")
+	}
+
+	program := &v2RequestProgram{
+		message: runtime,
+		fields:  make([]v2RequestFieldProgram, 0, len(message.Fields)),
+	}
+
+	for i := range message.Fields {
+		rpcField := &message.Fields[i]
+		fieldRuntime, ok := runtime.fieldsByName[rpcField.Name]
+		if !ok {
+			continue
+		}
+
+		if rpcField.IsListType {
+			return nil, fmt.Errorf("list wrapper request fields are not yet supported natively")
+		}
+
+		fieldProgram := v2RequestFieldProgram{
+			runtime:     fieldRuntime,
+			jsonPath:    rpcField.JSONPath,
+			staticValue: rpcField.StaticValue,
+			enumName:    rpcField.EnumName,
+			optional:    rpcField.Optional,
+			repeated:    rpcField.Repeated,
+		}
+
+		if rpcField.IsOptionalScalar() {
+			if !fieldRuntime.isMessage || fieldRuntime.message == nil {
+				return nil, fmt.Errorf("optional scalar wrapper field %s is missing message runtime", rpcField.Name)
+			}
+			wrapper := rpcField.ToOptionalTypeMessage(fieldRuntime.message.name)
+			wrapper.Fields[0].JSONPath = ""
+			child, err := compileV2RequestProgram(fieldRuntime.message, wrapper)
+			if err != nil {
+				return nil, err
+			}
+			fieldProgram.child = child
+		} else if fieldRuntime.isMessage {
+			if rpcField.Message == nil {
+				return nil, fmt.Errorf("message field %s has no child rpc message", rpcField.Name)
+			}
+			child, err := compileV2RequestProgram(fieldRuntime.message, rpcField.Message)
+			if err != nil {
+				return nil, err
+			}
+			fieldProgram.child = child
+		}
+
+		program.fields = append(program.fields, fieldProgram)
+	}
+
+	if wire, ok := compileV2WirePlan(program); ok {
+		program.wire = wire
+	}
+
+	return program, nil
+}
+
+func compileV2ResolveRequestProgram(runtime *v2MessageRuntime, message *RPCMessage, dependencyRuntime *v2MessageRuntime) (*v2RequestProgram, error) {
+	if message == nil {
+		return nil, fmt.Errorf("request rpc message is nil")
+	}
+	if message.IsOneOf() {
+		return nil, fmt.Errorf("oneof request messages are not yet supported natively")
+	}
+
+	program := &v2RequestProgram{
+		message: runtime,
+		fields:  make([]v2RequestFieldProgram, 0, len(message.Fields)),
+	}
+
+	for i := range message.Fields {
+		rpcField := &message.Fields[i]
+		fieldRuntime, ok := runtime.fieldsByName[rpcField.Name]
+		if !ok {
+			continue
+		}
+
+		switch rpcField.Name {
+		case "context":
+			contextProgram, err := compileV2ContextProgram(fieldRuntime, rpcField, dependencyRuntime)
+			if err != nil {
+				return nil, err
+			}
+			program.context = contextProgram
+		default:
+			fieldProgram, err := compileV2RequestFieldProgram(fieldRuntime, rpcField)
+			if err != nil {
+				return nil, err
+			}
+			program.fields = append(program.fields, fieldProgram)
+		}
+	}
+
+	if program.context == nil {
+		return nil, fmt.Errorf("resolve request message %s is missing a context program", message.Name)
+	}
+
+	return program, nil
+}
+
+func compileV2RequestFieldProgram(fieldRuntime *v2FieldRuntime, rpcField *RPCField) (v2RequestFieldProgram, error) {
+	if rpcField.IsListType {
+		return v2RequestFieldProgram{}, fmt.Errorf("list wrapper request fields are not yet supported natively")
+	}
+	fieldProgram := v2RequestFieldProgram{
+		runtime:     fieldRuntime,
+		jsonPath:    rpcField.JSONPath,
+		staticValue: rpcField.StaticValue,
+		enumName:    rpcField.EnumName,
+		optional:    rpcField.Optional,
+		repeated:    rpcField.Repeated,
+	}
+
+	if rpcField.IsOptionalScalar() {
+		if !fieldRuntime.isMessage || fieldRuntime.message == nil {
+			return v2RequestFieldProgram{}, fmt.Errorf("optional scalar wrapper field %s is missing message runtime", rpcField.Name)
+		}
+		wrapper := rpcField.ToOptionalTypeMessage(fieldRuntime.message.name)
+		wrapper.Fields[0].JSONPath = ""
+		child, err := compileV2RequestProgram(fieldRuntime.message, wrapper)
+		if err != nil {
+			return v2RequestFieldProgram{}, err
+		}
+		fieldProgram.child = child
+	} else if fieldRuntime.isMessage {
+		if rpcField.Message == nil {
+			return v2RequestFieldProgram{}, fmt.Errorf("message field %s has no child rpc message", rpcField.Name)
+		}
+		child, err := compileV2RequestProgram(fieldRuntime.message, rpcField.Message)
+		if err != nil {
+			return v2RequestFieldProgram{}, err
+		}
+		fieldProgram.child = child
+	}
+
+	return fieldProgram, nil
+}
+
+func compileV2ContextProgram(fieldRuntime *v2FieldRuntime, rpcField *RPCField, dependencyRuntime *v2MessageRuntime) (*v2ContextProgram, error) {
+	if !fieldRuntime.repeated || !fieldRuntime.isMessage {
+		return nil, fmt.Errorf("resolve context field %s must be a repeated message", rpcField.Name)
+	}
+	if rpcField.Message == nil {
+		return nil, fmt.Errorf("resolve context field %s has no message definition", rpcField.Name)
+	}
+
+	program := &v2ContextProgram{
+		runtime: fieldRuntime,
+		message: fieldRuntime.message,
+		fields:  make([]v2ContextFieldProgram, 0, len(rpcField.Message.Fields)),
+	}
+
+	for i := range rpcField.Message.Fields {
+		contextField := &rpcField.Message.Fields[i]
+		contextRuntime, ok := fieldRuntime.message.fieldsByName[contextField.Name]
+		if !ok {
+			return nil, fmt.Errorf("resolve context field runtime missing for %s", contextField.Name)
+		}
+
+		pathProgram, err := compileV2ResolvePathProgram(dependencyRuntime, contextField.ResolvePath)
+		if err != nil {
+			return nil, err
+		}
+
+		program.fields = append(program.fields, v2ContextFieldProgram{
+			runtime: contextRuntime,
+			path:    pathProgram,
+		})
+	}
+
+	return program, nil
+}
+
+func compileV2ResolvePathProgram(runtime *v2MessageRuntime, path ast.Path) (v2ResolvePathProgram, error) {
+	if path.Len() == 0 {
+		return v2ResolvePathProgram{}, fmt.Errorf("resolve path is empty")
+	}
+
+	program := v2ResolvePathProgram{
+		steps: make([]v2ResolvePathStep, 0, path.Len()),
+	}
+
+	current := runtime
+	for i := range path {
+		fieldName := path[i].FieldName.String()
+		if len(fieldName) > 0 && fieldName[0] == '@' {
+			return v2ResolvePathProgram{}, fmt.Errorf("resolve path %s uses nested list markers which are not yet supported natively", path.String())
+		}
+
+		fieldRuntime, ok := current.fieldsByName[fieldName]
+		if !ok {
+			return v2ResolvePathProgram{}, fmt.Errorf("resolve path field %s not found in %s", fieldName, current.name)
+		}
+
+		program.steps = append(program.steps, v2ResolvePathStep{runtime: fieldRuntime})
+		if i < len(path)-1 {
+			if !fieldRuntime.isMessage {
+				return v2ResolvePathProgram{}, fmt.Errorf("resolve path %s terminates early on scalar field %s", path.String(), fieldName)
+			}
+			current = fieldRuntime.message
+		}
+	}
+
+	return program, nil
+}
+
+func compileV2ResponseProgram(schema *v2SchemaRuntime, runtime *v2MessageRuntime, message *RPCMessage) (*v2ResponseProgram, error) {
+	if message == nil {
+		return nil, fmt.Errorf("response rpc message is nil")
+	}
+
+	program := &v2ResponseProgram{
+		message:   runtime,
+		fields:    make([]v2ResponseFieldProgram, 0, len(message.Fields)),
+		oneOfType: message.OneOfType,
+	}
+
+	if len(message.FragmentFields) > 0 {
+		program.fragments = make(map[string]*v2ResponseProgram, len(message.FragmentFields))
+		for typeName, fragmentFields := range message.FragmentFields {
+			fragmentRuntime, ok := schema.messageRuntime(typeName)
+			if !ok {
+				return nil, fmt.Errorf("response fragment runtime missing for %s", typeName)
+			}
+			fragmentProgram, err := compileV2ResponseProgram(schema, fragmentRuntime, &RPCMessage{
+				Name:   typeName,
+				Fields: fragmentFields,
+			})
+			if err != nil {
+				return nil, err
+			}
+			program.fragments[typeName] = fragmentProgram
+		}
+	}
+
+	for i := range message.Fields {
+		rpcField := &message.Fields[i]
+
+		fieldProgram := v2ResponseFieldProgram{
+			name:        rpcField.AliasOrPath(),
+			staticValue: rpcField.StaticValue,
+			enumName:    rpcField.EnumName,
+			repeated:    rpcField.Repeated,
+			scalarType:  rpcField.ProtoTypeName,
+		}
+
+		if rpcField.StaticValue != "" {
+			program.fields = append(program.fields, fieldProgram)
+			continue
+		}
+
+		fieldRuntime, ok := runtime.fieldsByName[rpcField.Name]
+		if !ok {
+			continue
+		}
+		fieldProgram.runtime = fieldRuntime
+
+		if rpcField.IsListType {
+			return nil, fmt.Errorf("list wrapper response fields are not yet supported natively")
+		}
+		if rpcField.IsOptionalScalar() {
+			return nil, fmt.Errorf("optional scalar wrapper response fields are not yet supported natively")
+		}
+		if rpcField.JSONPath == "" {
+			return nil, fmt.Errorf("flattened response fields are not yet supported natively")
+		}
+		if fieldRuntime.dataType == DataTypeEnum {
+			return nil, fmt.Errorf("enum response fields are not yet supported natively")
+		}
+
+		if fieldRuntime.isMessage {
+			if rpcField.Message == nil {
+				return nil, fmt.Errorf("message field %s has no child rpc message", rpcField.Name)
+			}
+			child, err := compileV2ResponseProgram(schema, fieldRuntime.message, rpcField.Message)
+			if err != nil {
+				return nil, err
+			}
+			fieldProgram.child = child
+		}
+
+		program.fields = append(program.fields, fieldProgram)
+	}
+
+	return program, nil
+}
+
+func compileV2StageIndexes(plan *RPCExecutionPlan) ([]int, error) {
+	stageIndexes := initializeSlice(len(plan.Calls), -1)
+	cycleChecks := make([]bool, len(plan.Calls))
+
+	var visit func(index int) error
+	visit = func(index int) error {
+		if cycleChecks[index] {
+			return fmt.Errorf("cycle detected")
+		}
+		cycleChecks[index] = true
+
+		call := &plan.Calls[index]
+		if len(call.DependentCalls) == 0 {
+			stageIndexes[index] = 0
+			return nil
+		}
+
+		currentLevel := 0
+		for _, dep := range call.DependentCalls {
+			if dep < 0 || dep >= len(plan.Calls) {
+				return fmt.Errorf("unable to find dependent call %d in execution plan", dep)
+			}
+			if level := stageIndexes[dep]; level >= 0 {
+				if level > currentLevel {
+					currentLevel = level
+				}
+				continue
+			}
+			if err := visit(dep); err != nil {
+				return err
+			}
+			if level := stageIndexes[dep]; level > currentLevel {
+				currentLevel = level
+			}
+		}
+
+		stageIndexes[index] = currentLevel + 1
+		return nil
+	}
+
+	for i := range plan.Calls {
+		if err := visit(i); err != nil {
+			return nil, err
+		}
+		clear(cycleChecks)
+	}
+
+	return stageIndexes, nil
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go
new file mode 100644
index 0000000000..907e6312d3
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_frame.go
@@ -0,0 +1,386 @@
+package grpcdatasource
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/wundergraph/astjson"
+	"github.com/wundergraph/go-arena"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve"
+)
+
+type v2ResponseFrameKind uint8
+
+const (
+	v2ResponseFrameKindNull v2ResponseFrameKind = iota
+	v2ResponseFrameKindObject
+	v2ResponseFrameKindArray
+	v2ResponseFrameKindString
+	v2ResponseFrameKindNumber
+	v2ResponseFrameKindBool
+)
+
+type v2ResponseFrameBuilder struct {
+	nodes  []v2ResponseFrameNode
+	buffer []byte
+}
+
+type v2ResponseFrameNode struct {
+	kind         v2ResponseFrameKind
+	boolValue    bool
+	stringValue  string
+	objectFields []v2ResponseFrameField
+	arrayValues  []int
+}
+
+type v2ResponseFrameField struct {
+	name  string
+	value int
+}
+
+type v2NativeMergeResult struct {
+	frame *v2ResponseFrameBuilder
+	root  int
+}
+
+func newV2ResponseFrameBuilder() *v2ResponseFrameBuilder {
+	return &v2ResponseFrameBuilder{
+		nodes: make([]v2ResponseFrameNode, 0, 32),
+	}
+}
+
+func (b *v2ResponseFrameBuilder) reset() {
+	for i := range b.nodes {
+		b.nodes[i].boolValue = false
+		b.nodes[i].stringValue = ""
+		b.nodes[i].objectFields = b.nodes[i].objectFields[:0]
+		b.nodes[i].arrayValues = b.nodes[i].arrayValues[:0]
+	}
+	b.nodes = b.nodes[:0]
+	b.buffer = b.buffer[:0]
+}
+
+func (b *v2ResponseFrameBuilder) newObject() int {
+	return b.newNode(v2ResponseFrameNode{kind: v2ResponseFrameKindObject})
+}
+
+func (b *v2ResponseFrameBuilder) newArray() int {
+	return b.newNode(v2ResponseFrameNode{kind: v2ResponseFrameKindArray})
+}
+
+func (b *v2ResponseFrameBuilder) newString(value string) int {
+	return b.newNode(v2ResponseFrameNode{kind: v2ResponseFrameKindString, stringValue: value})
+}
+
+func (b *v2ResponseFrameBuilder) newNumber(value string) int {
+	return b.newNode(v2ResponseFrameNode{kind: v2ResponseFrameKindNumber, stringValue: value})
+}
+
+func (b *v2ResponseFrameBuilder) newBool(value bool) int {
+	return b.newNode(v2ResponseFrameNode{kind: v2ResponseFrameKindBool, boolValue: value})
+}
+
+func (b *v2ResponseFrameBuilder) newNull() int {
+	return b.newNode(v2ResponseFrameNode{kind: v2ResponseFrameKindNull})
+}
+
+func (b *v2ResponseFrameBuilder) newNode(node v2ResponseFrameNode) int {
+	index := len(b.nodes)
+	if index < cap(b.nodes) {
+		b.nodes = b.nodes[:index+1]
+		existing := &b.nodes[index]
+		objectFields := existing.objectFields[:0]
+		arrayValues := existing.arrayValues[:0]
+		*existing = node
+		if node.kind == v2ResponseFrameKindObject {
+			existing.objectFields = objectFields
+		}
+		if node.kind == v2ResponseFrameKindArray {
+			existing.arrayValues = arrayValues
+		}
+		return index
+	}
+
+	b.nodes = append(b.nodes, node)
+	return index
+}
+
+func (b *v2ResponseFrameBuilder) setObjectField(objectIndex int, name string, valueIndex int) {
+	object := &b.nodes[objectIndex]
+	for i := range object.objectFields {
+		if object.objectFields[i].name == name {
+			object.objectFields[i].value = valueIndex
+			return
+		}
+	}
+	object.objectFields = append(object.objectFields, v2ResponseFrameField{
+		name:  name,
+		value: valueIndex,
+	})
+}
+
+func (b *v2ResponseFrameBuilder) getObjectField(objectIndex int, name string) (int, bool) {
+	object := &b.nodes[objectIndex]
+	for i := range object.objectFields {
+		if object.objectFields[i].name == name {
+			return object.objectFields[i].value, true
+		}
+	}
+	return 0, false
+}
+
+func (b *v2ResponseFrameBuilder) appendArrayItem(arrayIndex int, valueIndex int) {
+	array := &b.nodes[arrayIndex]
+	array.arrayValues = append(array.arrayValues, valueIndex)
+}
+
+func (b *v2ResponseFrameBuilder) marshalDataEnvelope(root int) []byte {
+	b.buffer = b.buffer[:0]
+	b.buffer = append(b.buffer, `{"data":`...)
+	b.buffer = b.appendNodeJSON(b.buffer, root)
+	b.buffer = append(b.buffer, '}')
+	out := b.buffer
+	b.buffer = nil
+	return out
+}
+
+func (b *v2ResponseFrameBuilder) dataEnvelopeValue(a arena.Arena, root int) *astjson.Value {
+	data := astjson.ObjectValue(a)
+	data.Set(a, "data", b.nodeValue(a, root))
+	return data
+}
+
+func (b *v2ResponseFrameBuilder) nodeValue(a arena.Arena, nodeIndex int) *astjson.Value {
+	node := &b.nodes[nodeIndex]
+	switch node.kind {
+	case v2ResponseFrameKindNull:
+		return astjson.NullValue
+	case v2ResponseFrameKindObject:
+		obj := astjson.ObjectValue(a)
+		for i := range node.objectFields {
+			obj.Set(a, node.objectFields[i].name, b.nodeValue(a, node.objectFields[i].value))
+		}
+		return obj
+	case v2ResponseFrameKindArray:
+		arr := astjson.ArrayValue(a)
+		for i := range node.arrayValues {
+			arr.SetArrayItem(a, i, b.nodeValue(a, node.arrayValues[i]))
+		}
+		return arr
+	case v2ResponseFrameKindString:
+		return astjson.StringValue(a, node.stringValue)
+	case v2ResponseFrameKindNumber:
+		return astjson.NumberValue(a, node.stringValue)
+	case v2ResponseFrameKindBool:
+		if node.boolValue {
+			return astjson.TrueValue(a)
+		}
+		return astjson.FalseValue(a)
+	default:
+		panic(fmt.Sprintf("unsupported response frame kind %d", node.kind))
+	}
+}
+
+func (b *v2ResponseFrameBuilder) appendNodeJSON(dst []byte, nodeIndex int) []byte {
+	node := &b.nodes[nodeIndex]
+
+	switch node.kind {
+	case v2ResponseFrameKindNull:
+		return append(dst, "null"...)
+	case v2ResponseFrameKindObject:
+		dst = append(dst, '{')
+		for i := range node.objectFields {
+			if i > 0 {
+				dst = append(dst, ',')
+			}
+			dst = strconv.AppendQuote(dst, node.objectFields[i].name)
+			dst = append(dst, ':')
+			dst = b.appendNodeJSON(dst, node.objectFields[i].value)
+		}
+		return append(dst, '}')
+	case v2ResponseFrameKindArray:
+		dst = append(dst, '[')
+		for i := range node.arrayValues {
+			if i > 0 {
+				dst = append(dst, ',')
+			}
+			dst = b.appendNodeJSON(dst, node.arrayValues[i])
+		}
+		return append(dst, ']')
+	case v2ResponseFrameKindString:
+		return strconv.AppendQuote(dst, node.stringValue)
+	case v2ResponseFrameKindNumber:
+		return append(dst, node.stringValue...)
+	case v2ResponseFrameKindBool:
+		if node.boolValue {
+			return append(dst, "true"...)
+		}
+		return append(dst, "false"...)
+	default:
+		panic(fmt.Sprintf("unsupported response frame kind %d", node.kind))
+	}
+}
+
+func (b *v2ResponseFrameBuilder) flatten(nodeIndex int, path ast.Path) ([]int, error) {
+	if len(path) == 0 {
+		if b.nodes[nodeIndex].kind == v2ResponseFrameKindArray {
+			return append([]int(nil), b.nodes[nodeIndex].arrayValues...), nil
+		}
+		return []int{nodeIndex}, nil
+	}
+
+	node := &b.nodes[nodeIndex]
+	switch node.kind {
+	case v2ResponseFrameKindObject:
+		next, ok := b.getObjectField(nodeIndex, path[0].FieldName.String())
+		if !ok {
+			return nil, fmt.Errorf("response path %s not found", path.String())
+		}
+		return b.flatten(next, path[1:])
+	case v2ResponseFrameKindArray:
+		result := make([]int, 0, len(node.arrayValues))
+		for i := range node.arrayValues {
+			values, err := b.flatten(node.arrayValues[i], path)
+			if err != nil {
+				return nil, err
+			}
+			result = append(result, values...)
+		}
+		return result, nil
+	default:
+		return nil, fmt.Errorf("cannot traverse response path %s through node kind %d", path.String(), node.kind)
+	}
+}
+
+func (r *v2NativeMergeResult) MarshalTo(dst []byte) []byte {
+	return append(dst[:0], r.frame.marshalDataEnvelope(r.root)...)
+}
+
+func (r *v2NativeMergeResult) MergeInto(a arena.Arena, items []*astjson.Value, post resolve.PostProcessingConfiguration, batchStats [][]*astjson.Value) (*astjson.Value, error) {
+	nodeIndex, ok := r.selectDataNode(post.SelectResponseDataPath)
+	if !ok {
+		return astjson.NullValue, nil
+	}
+
+	if len(items) == 0 {
+		return r.frame.nodeValue(a, nodeIndex), nil
+	}
+
+	node := &r.frame.nodes[nodeIndex]
+	if len(items) == 1 && batchStats == nil {
+		return nil, r.mergeNodeIntoItem(a, items[0], nodeIndex, post.MergePath)
+	}
+
+	if node.kind != v2ResponseFrameKindArray {
+		return nil, fmt.Errorf("expected array response frame node, got %d", node.kind)
+	}
+
+	if batchStats != nil {
+		if len(batchStats) != len(node.arrayValues) {
+			return nil, fmt.Errorf("invalid batch item count: expected %d, got %d", len(batchStats), len(node.arrayValues))
+		}
+		for batchIndex, targets := range batchStats {
+			for _, target := range targets {
+				if err := r.mergeNodeIntoItem(a, target, node.arrayValues[batchIndex], post.MergePath); err != nil {
+					return nil, err
+				}
+			}
+		}
+		return nil, nil
+	}
+
+	if len(items) != len(node.arrayValues) {
+		return nil, fmt.Errorf("invalid batch item count: expected %d, got %d", len(items), len(node.arrayValues))
+	}
+	for i := range items {
+		if err := r.mergeNodeIntoItem(a, items[i], node.arrayValues[i], post.MergePath); err != nil {
+			return nil, err
+		}
+	}
+	return nil, nil
+}
+
+func (r *v2NativeMergeResult) selectDataNode(path []string) (int, bool) {
+	nodeIndex := r.root
+	if len(path) > 0 && path[0] == "data" {
+		path = path[1:]
+	}
+	for _, segment := range path {
+		node := &r.frame.nodes[nodeIndex]
+		switch node.kind {
+		case v2ResponseFrameKindObject:
+			next, ok := r.frame.getObjectField(nodeIndex, segment)
+			if !ok {
+				return 0, false
+			}
+			nodeIndex = next
+		case v2ResponseFrameKindArray:
+			index, err := strconv.Atoi(segment)
+			if err != nil || index < 0 || index >= len(node.arrayValues) {
+				return 0, false
+			}
+			nodeIndex = node.arrayValues[index]
+		default:
+			return 0, false
+		}
+	}
+	return nodeIndex, true
+}
+
+func (r *v2NativeMergeResult) mergeNodeIntoItem(a arena.Arena, target *astjson.Value, nodeIndex int, path []string) error {
+	if len(path) == 0 {
+		if r.frame.nodes[nodeIndex].kind == v2ResponseFrameKindObject && target.Type() == astjson.TypeObject {
+			r.mergeObjectNodeIntoObject(a, target, nodeIndex)
+			return nil
+		}
+		value := r.frame.nodeValue(a, nodeIndex)
+		_, _, err := astjson.MergeValuesWithPath(a, target, value)
+		return err
+	}
+
+	if target.Type() != astjson.TypeObject {
+		value := r.frame.nodeValue(a, nodeIndex)
+		_, _, err := astjson.MergeValuesWithPath(a, target, value, path...)
+		return err
+	}
+
+	parent := r.ensureObjectPath(a, target, path[:len(path)-1])
+	leaf := path[len(path)-1]
+	existing := parent.Get(leaf)
+	if existing != nil && existing.Type() == astjson.TypeObject && r.frame.nodes[nodeIndex].kind == v2ResponseFrameKindObject {
+		r.mergeObjectNodeIntoObject(a, existing, nodeIndex)
+		return nil
+	}
+	parent.Set(a, leaf, r.frame.nodeValue(a, nodeIndex))
+	return nil
+}
+
+func (r *v2NativeMergeResult) ensureObjectPath(a arena.Arena, target *astjson.Value, path []string) *astjson.Value {
+	current := target
+	for _, segment := range path {
+		next := current.Get(segment)
+		if next == nil || next.Type() != astjson.TypeObject {
+			next = astjson.ObjectValue(a)
+			current.Set(a, segment, next)
+		}
+		current = next
+	}
+	return current
+}
+
+func (r *v2NativeMergeResult) mergeObjectNodeIntoObject(a arena.Arena, target *astjson.Value, nodeIndex int) {
+	node := &r.frame.nodes[nodeIndex]
+	for i := range node.objectFields {
+		field := node.objectFields[i]
+		childNodeIndex := field.value
+		childNode := &r.frame.nodes[childNodeIndex]
+		existing := target.Get(field.name)
+		if existing != nil && existing.Type() == astjson.TypeObject && childNode.kind == v2ResponseFrameKindObject {
+			r.mergeObjectNodeIntoObject(a, existing, childNodeIndex)
+			continue
+		}
+		target.Set(a, field.name, r.frame.nodeValue(a, childNodeIndex))
+	}
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_hyperpb.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_hyperpb.go
new file mode 100644
index 0000000000..73807a765d
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_hyperpb.go
@@ -0,0 +1,37 @@
+package grpcdatasource
+
+import (
+	"fmt"
+
+	"buf.build/go/hyperpb"
+	"google.golang.org/grpc/encoding"
+	"google.golang.org/protobuf/proto"
+)
+
+type v2HyperpbCodec struct{}
+
+var _ encoding.Codec = v2HyperpbCodec{}
+
+func (v2HyperpbCodec) Name() string { return "proto" }
+
+func (v2HyperpbCodec) Marshal(v any) ([]byte, error) {
+	if msg, ok := v.(*v2PreMarshaledInput); ok {
+		return msg.wire, nil
+	}
+	msg, ok := v.(proto.Message)
+	if !ok {
+		return nil, fmt.Errorf("grpcdatasource v2HyperpbCodec: expected proto.Message, got %T", v)
+	}
+	return proto.Marshal(msg)
+}
+
+func (v2HyperpbCodec) Unmarshal(data []byte, v any) error {
+	if msg, ok := v.(*hyperpb.Message); ok {
+		return msg.Unmarshal(data)
+	}
+	protoMsg, ok := v.(proto.Message)
+	if !ok {
+		return fmt.Errorf("grpcdatasource v2HyperpbCodec: expected proto.Message or *hyperpb.Message, got %T", v)
+	}
+	return proto.Unmarshal(data, protoMsg)
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go
new file mode 100644
index 0000000000..51cae17aac
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_ir.go
@@ -0,0 +1,93 @@
+package grpcdatasource
+
+import (
+	"buf.build/go/hyperpb"
+	protoref "google.golang.org/protobuf/reflect/protoreflect"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
+)
+
+type v2Program struct {
+	stages           []v2Stage
+	nativeOperation  bool
+	requiresFallback bool
+	fallbackReasons  []string
+}
+
+type v2Stage struct {
+	fetches []v2Fetch
+}
+
+type v2Fetch struct {
+	id             int
+	kind           CallKind
+	dependencies   []int
+	serviceName    string
+	methodName     string
+	responsePath   ast.Path
+	request        *v2RequestProgram
+	response       *v2ResponseProgram
+	native         bool
+	fallbackReason string
+}
+
+type v2RequestProgram struct {
+	message *v2MessageRuntime
+	fields  []v2RequestFieldProgram
+	context *v2ContextProgram
+	wire    *v2WirePlan
+}
+
+type v2RequestFieldProgram struct {
+	runtime     *v2FieldRuntime
+	jsonPath    string
+	staticValue string
+	enumName    string
+	optional    bool
+	repeated    bool
+	child       *v2RequestProgram
+}
+
+type v2ResponseProgram struct {
+	message   *v2MessageRuntime
+	fields    []v2ResponseFieldProgram
+	oneOfType OneOfType
+	fragments map[string]*v2ResponseProgram
+}
+
+type v2ResponseFieldProgram struct {
+	runtime     *v2FieldRuntime
+	name        string
+	staticValue string
+	enumName    string
+	repeated    bool
+	child       *v2ResponseProgram
+	scalarType  DataType
+}
+
+type v2NativeResponse struct {
+	kind         CallKind
+	responsePath ast.Path
+	output       protoref.Message
+	shared       *hyperpb.Shared
+	skip         bool
+}
+
+type v2ContextProgram struct {
+	runtime *v2FieldRuntime
+	message *v2MessageRuntime
+	fields  []v2ContextFieldProgram
+}
+
+type v2ContextFieldProgram struct {
+	runtime *v2FieldRuntime
+	path    v2ResolvePathProgram
+}
+
+type v2ResolvePathProgram struct {
+	steps []v2ResolvePathStep
+}
+
+type v2ResolvePathStep struct {
+	runtime *v2FieldRuntime
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go
new file mode 100644
index 0000000000..3060f1552d
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_runtime.go
@@ -0,0 +1,502 @@
+package grpcdatasource
+
+import (
+	"fmt"
+	"strconv"
+
+	"github.com/tidwall/gjson"
+	protoref "google.golang.org/protobuf/reflect/protoreflect"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
+)
+
+func (p *v2RequestProgram) build(data gjson.Result, schema *v2SchemaRuntime, compiler *RPCCompiler) (protoref.Message, error) {
+	msg := p.message.newMessage()
+
+	if err := p.populateFields(msg, data, schema, compiler); err != nil {
+		return nil, err
+	}
+
+	return msg, nil
+}
+
+func (p *v2RequestProgram) buildInput(data gjson.Result, schema *v2SchemaRuntime, compiler *RPCCompiler) (any, error) {
+	if p.wire != nil {
+		wire, err := p.wire.execute(nil, data)
+		if err != nil {
+			return nil, err
+		}
+		return &v2PreMarshaledInput{wire: wire}, nil
+	}
+	return p.build(data, schema, compiler)
+}
+
+func (p *v2RequestProgram) buildWithDependency(data gjson.Result, dependency protoref.Message, schema *v2SchemaRuntime, compiler *RPCCompiler) (protoref.Message, bool, error) {
+	msg := p.message.newMessage()
+
+	if err := p.populateFields(msg, data, schema, compiler); err != nil {
+		return nil, false, err
+	}
+
+	if p.context == nil {
+		return msg, false, nil
+	}
+	if dependency == nil || !dependency.IsValid() {
+		return nil, true, nil
+	}
+
+	rows, err := p.context.extractRows(dependency)
+	if err != nil {
+		return nil, false, err
+	}
+	if len(rows) == 0 {
+		return nil, true, nil
+	}
+
+	contextList := msg.Mutable(p.context.runtime.descriptorFor(msg)).List()
+	for _, row := range rows {
+		contextList.Append(protoref.ValueOfMessage(row))
+	}
+
+	return msg, false, nil
+}
+
+func (p *v2RequestProgram) populateFields(message protoref.Message, data gjson.Result, schema *v2SchemaRuntime, compiler *RPCCompiler) error {
+	for i := range p.fields {
+		field := &p.fields[i]
+		fd := field.runtime.descriptorFor(message)
+
+		if field.repeated {
+			elements := data.Get(field.jsonPath).Array()
+			if len(elements) == 0 {
+				continue
+			}
+
+			list := message.Mutable(fd).List()
+			for _, element := range elements {
+				if field.child != nil {
+					childMsg, err := field.child.build(element, schema, compiler)
+					if err != nil {
+						return err
+					}
+					list.Append(protoref.ValueOfMessage(childMsg))
+					continue
+				}
+
+				value, err := field.inputValue(element, compiler)
+				if err != nil {
+					return err
+				}
+				list.Append(value)
+			}
+			continue
+		}
+
+		if field.child != nil {
+			fieldData := data
+			if field.jsonPath != "" {
+				fieldData = data.Get(field.jsonPath)
+			}
+			if isNullValue(fieldData) {
+				if field.optional {
+					continue
+				}
+				return fmt.Errorf("field %s is required but has no value", field.jsonPath)
+			}
+
+			childMsg, err := field.child.build(fieldData, schema, compiler)
+			if err != nil {
+				return err
+			}
+			message.Set(fd, protoref.ValueOfMessage(childMsg))
+			continue
+		}
+
+		if field.staticValue != "" {
+			value, err := field.inputValue(gjson.Parse(field.staticValue), compiler)
+			if err != nil {
+				return err
+			}
+			message.Set(fd, value)
+			continue
+		}
+
+		fieldData := data.Get(field.jsonPath)
+		if isNullValue(fieldData) {
+			if field.optional {
+				continue
+			}
+			return fmt.Errorf("field %s is required but has no value", field.jsonPath)
+		}
+
+		value, err := field.inputValue(fieldData, compiler)
+		if err != nil {
+			return err
+		}
+		message.Set(fd, value)
+	}
+
+	return nil
+}
+
+func (f *v2RequestFieldProgram) inputValue(data gjson.Result, compiler *RPCCompiler) (protoref.Value, error) {
+	if f.runtime.dataType == DataTypeEnum {
+		return compiler.getEnumValue(f.enumName, data)
+	}
+	return compiler.setValueForKind(f.runtime.dataType, data), nil
+}
+
+func (p *v2ContextProgram) extractRows(message protoref.Message) ([]protoref.Message, error) {
+	rows := make([]protoref.Message, 0)
+
+	for i := range p.fields {
+		field := &p.fields[i]
+		values := field.path.extract(message)
+		if len(values) == 0 {
+			return nil, nil
+		}
+
+		if len(rows) == 0 {
+			rows = make([]protoref.Message, len(values))
+			for index := range values {
+				rows[index] = p.message.newMessage()
+			}
+		}
+
+		if len(values) != len(rows) {
+			return nil, fmt.Errorf("resolve context field %s produced %d values, expected %d", field.runtime.name, len(values), len(rows))
+		}
+
+		for index, value := range values {
+			rows[index].Set(field.runtime.descriptorFor(rows[index]), value)
+		}
+	}
+
+	return rows, nil
+}
+
+func (p v2ResolvePathProgram) extract(message protoref.Message) []protoref.Value {
+	if !message.IsValid() {
+		return nil
+	}
+
+	return p.extractFromMessage(message, 0)
+}
+
+func (p v2ResolvePathProgram) extractFromMessage(message protoref.Message, stepIndex int) []protoref.Value {
+	if !message.IsValid() || stepIndex >= len(p.steps) {
+		return nil
+	}
+
+	step := p.steps[stepIndex]
+	fieldValue := message.Get(step.runtime.descriptorFor(message))
+	if !fieldValue.IsValid() {
+		return nil
+	}
+
+	if step.runtime.repeated {
+		list := fieldValue.List()
+		if !list.IsValid() || list.Len() == 0 {
+			return nil
+		}
+
+		result := make([]protoref.Value, 0, list.Len())
+		for i := 0; i < list.Len(); i++ {
+			item := list.Get(i)
+			if step.runtime.isMessage && stepIndex < len(p.steps)-1 {
+				result = append(result, p.extractFromMessage(item.Message(), stepIndex+1)...)
+				continue
+			}
+			result = append(result, item)
+		}
+		return result
+	}
+
+	if step.runtime.isMessage {
+		if stepIndex == len(p.steps)-1 {
+			return []protoref.Value{protoref.ValueOfMessage(fieldValue.Message())}
+		}
+		return p.extractFromMessage(fieldValue.Message(), stepIndex+1)
+	}
+
+	return []protoref.Value{fieldValue}
+}
+
+func (p *v2ResponseProgram) attach(builder *jsonBuilder, frame *v2ResponseFrameBuilder, root int, data protoref.Message, kind CallKind, path ast.Path) error {
+	if data == nil || !data.IsValid() {
+		return nil
+	}
+
+	switch kind {
+	case CallKindResolve, CallKindRequired:
+		return p.attachResolve(builder, frame, root, data, path)
+	default:
+		return p.applyObject(builder, frame, root, data)
+	}
+}
+
+func (p *v2ResponseProgram) validateFederatedOutput(builder *jsonBuilder, data protoref.Message) error {
+	if len(builder.indexMap) == 0 || data == nil || !data.IsValid() {
+		return nil
+	}
+
+	entitiesField := p.fieldByName("_entities")
+	if entitiesField == nil {
+		return nil
+	}
+
+	fd := entitiesField.runtime.descriptorFor(data)
+	if !fd.IsList() || entitiesField.child == nil {
+		return fmt.Errorf("federated response field %s must be a repeated message", entitiesField.name)
+	}
+
+	typenameField := entitiesField.child.fieldByName("__typename")
+	if typenameField == nil {
+		return fmt.Errorf("federated response field %s is missing __typename", entitiesField.name)
+	}
+
+	entities := data.Get(fd).List()
+	entityCountPerType := make(map[string]int)
+	for i := 0; i < entities.Len(); i++ {
+		entity := entities.Get(i).Message()
+		if !entity.IsValid() {
+			continue
+		}
+		typeName := typenameField.staticValue
+		if typeName == "" {
+			if typenameField.runtime == nil {
+				return fmt.Errorf("federated response field %s is missing a runtime or static typename", typenameField.name)
+			}
+			typeName = entity.Get(typenameField.runtime.descriptorFor(entity)).String()
+		}
+		if typeName == "" {
+			continue
+		}
+		entityCountPerType[typeName]++
+	}
+
+	for typeName, count := range entityCountPerType {
+		expected, found := builder.indexMap[typeName]
+		if !found {
+			return fmt.Errorf("entity type %s received in the subgraph response, but was not expected", typeName)
+		}
+		if len(expected) != count {
+			return fmt.Errorf("entity type %s received %d entities in the subgraph response, but %d are expected", typeName, count, len(expected))
+		}
+	}
+
+	return nil
+}
+
+func (p *v2ResponseProgram) applyObject(builder *jsonBuilder, frame *v2ResponseFrameBuilder, root int, data protoref.Message) error {
+	for i := range p.fields {
+		field := &p.fields[i]
+		value, err := field.materialize(builder, frame, data)
+		if err != nil {
+			return err
+		}
+		frame.setObjectField(root, field.name, value)
+	}
+	return nil
+}
+
+func (p *v2ResponseProgram) attachResolve(builder *jsonBuilder, frame *v2ResponseFrameBuilder, root int, data protoref.Message, path ast.Path) error {
+	if len(path) == 0 {
+		return fmt.Errorf("resolve response path is empty")
+	}
+	if len(p.fields) != 1 {
+		return fmt.Errorf("resolve response requires exactly one top-level field, got %d", len(p.fields))
+	}
+
+	resultField := &p.fields[0]
+	fd := resultField.runtime.descriptorFor(data)
+	if !fd.IsList() || resultField.child == nil {
+		return fmt.Errorf("resolve response field %s must be a repeated message", resultField.name)
+	}
+
+	list := data.Get(fd).List()
+	if !list.IsValid() || list.Len() == 0 {
+		return nil
+	}
+
+	searchPath := path[:len(path)-1]
+	elementName := path[len(path)-1].FieldName.String()
+
+	targets, err := p.resolveAttachTargets(frame, root, searchPath)
+	if err != nil {
+		return err
+	}
+	if len(targets) != list.Len() {
+		return fmt.Errorf("length of values doesn't match the length of the result array, expected %d, got %d", len(targets), list.Len())
+	}
+
+	attachField := resultField.child.fieldByName(elementName)
+	if attachField == nil {
+		return fmt.Errorf("resolve result field %s not found", elementName)
+	}
+
+	for i := 0; i < list.Len(); i++ {
+		value, err := attachField.materialize(builder, frame, list.Get(i).Message())
+		if err != nil {
+			return err
+		}
+		frame.setObjectField(targets[i], elementName, value)
+	}
+
+	return nil
+}
+
+func (p *v2ResponseProgram) resolveAttachTargets(frame *v2ResponseFrameBuilder, root int, path ast.Path) ([]int, error) {
+	if path.Len() == 0 {
+		return []int{root}, nil
+	}
+
+	next, ok := frame.getObjectField(root, path[0].FieldName.String())
+	if !ok {
+		return nil, fmt.Errorf("response path %s not found", path.String())
+	}
+	return frame.flatten(next, path[1:])
+}
+
+func (p *v2ResponseProgram) fieldByName(name string) *v2ResponseFieldProgram {
+	for i := range p.fields {
+		if p.fields[i].name == name {
+			return &p.fields[i]
+		}
+	}
+	return nil
+}
+
+func (f *v2ResponseFieldProgram) materialize(builder *jsonBuilder, frame *v2ResponseFrameBuilder, data protoref.Message) (int, error) {
+	if f.staticValue != "" {
+		return frame.newString(f.staticValue), nil
+	}
+
+	fd := f.runtime.descriptorFor(data)
+	if fd.IsList() {
+		arr := frame.newArray()
+		list := data.Get(fd).List()
+		if !list.IsValid() {
+			return arr, nil
+		}
+
+		for i := 0; i < list.Len(); i++ {
+			if f.child != nil {
+				childValue, err := f.child.objectValue(builder, frame, list.Get(i).Message())
+				if err != nil {
+					return 0, err
+				}
+				frame.appendArrayItem(arr, childValue)
+				continue
+			}
+
+			value, err := scalarFrameValue(builder, frame, list.Get(i), fd)
+			if err != nil {
+				return 0, err
+			}
+			frame.appendArrayItem(arr, value)
+		}
+
+		return arr, nil
+	}
+
+	if f.child != nil {
+		msg := data.Get(fd).Message()
+		if !msg.IsValid() {
+			return frame.newNull(), nil
+		}
+		return f.child.objectValue(builder, frame, msg)
+	}
+
+	return scalarFrameValue(builder, frame, data.Get(fd), fd)
+}
+
+func (p *v2ResponseProgram) objectValue(builder *jsonBuilder, frame *v2ResponseFrameBuilder, data protoref.Message) (int, error) {
+	if data == nil || !data.IsValid() {
+		return frame.newNull(), nil
+	}
+	if p.oneOfType != OneOfTypeNone {
+		return p.oneOfObjectValue(builder, frame, data)
+	}
+
+	root := frame.newObject()
+	if err := p.applyObject(builder, frame, root, data); err != nil {
+		return 0, err
+	}
+	return root, nil
+}
+
+func (p *v2ResponseProgram) oneOfObjectValue(builder *jsonBuilder, frame *v2ResponseFrameBuilder, data protoref.Message) (int, error) {
+	root := frame.newObject()
+	if err := p.applyObject(builder, frame, root, data); err != nil {
+		return 0, err
+	}
+
+	oneofDesc := data.Descriptor().Oneofs().ByName(protoref.Name(p.oneOfType.FieldName()))
+	if oneofDesc == nil {
+		return 0, fmt.Errorf("oneof %s not found on message %s", p.oneOfType.FieldName(), data.Descriptor().FullName())
+	}
+
+	activeField := data.WhichOneof(oneofDesc)
+	if activeField == nil {
+		return root, nil
+	}
+
+	activeMessage := data.Get(activeField).Message()
+	if !activeMessage.IsValid() {
+		return root, nil
+	}
+
+	fragmentProgram, ok := p.fragments[string(activeField.Message().Name())]
+	if !ok {
+		return root, nil
+	}
+	if err := fragmentProgram.applyObject(builder, frame, root, activeMessage); err != nil {
+		return 0, err
+	}
+
+	return root, nil
+}
+
+func (p *v2ResponseProgram) write(builder *jsonBuilder, frame *v2ResponseFrameBuilder, data protoref.Message) (int, error) {
+	return p.objectValue(builder, frame, data)
+}
+
+func scalarFrameValue(builder *jsonBuilder, frame *v2ResponseFrameBuilder, data protoref.Value, fd protoref.FieldDescriptor) (int, error) {
+	if !data.IsValid() {
+		return frame.newNull(), nil
+	}
+
+	switch fd.Kind() {
+	case protoref.BoolKind:
+		if data.Bool() {
+			return frame.newBool(true), nil
+		}
+		return frame.newBool(false), nil
+	case protoref.StringKind:
+		return frame.newString(data.String()), nil
+	case protoref.Int32Kind:
+		return frame.newNumber(strconv.FormatInt(data.Int(), 10)), nil
+	case protoref.Int64Kind:
+		return frame.newNumber(strconv.FormatInt(data.Int(), 10)), nil
+	case protoref.Uint32Kind, protoref.Uint64Kind:
+		return frame.newNumber(strconv.FormatUint(data.Uint(), 10)), nil
+	case protoref.FloatKind, protoref.DoubleKind:
+		return frame.newNumber(strconv.FormatFloat(data.Float(), 'g', -1, 64)), nil
+	case protoref.BytesKind:
+		return frame.newString(string(data.Bytes())), nil
+	case protoref.EnumKind:
+		enumDesc := fd.Enum()
+		enumValueDesc := enumDesc.Values().ByNumber(data.Enum())
+		if enumValueDesc == nil {
+			return frame.newNull(), nil
+		}
+		graphqlValue, ok := builder.mapping.FindEnumValueMapping(string(enumDesc.Name()), string(enumValueDesc.Name()))
+		if !ok {
+			return frame.newNull(), nil
+		}
+		return frame.newString(graphqlValue), nil
+	default:
+		return frame.newNull(), fmt.Errorf("unsupported scalar kind %s", fd.Kind())
+	}
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go
new file mode 100644
index 0000000000..4a3c1f8abb
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_schema.go
@@ -0,0 +1,138 @@
+package grpcdatasource
+
+import (
+	"fmt"
+
+	"buf.build/go/hyperpb"
+	protoref "google.golang.org/protobuf/reflect/protoreflect"
+	"google.golang.org/protobuf/reflect/protoregistry"
+	"google.golang.org/protobuf/types/dynamicpb"
+)
+
+type v2SchemaRuntime struct {
+	messageByName        map[string]*v2MessageRuntime
+	messageByFullName    map[string]*v2MessageRuntime
+	methodByName         map[string]*Method
+	serviceNamesByMethod map[string]string
+}
+
+type v2MessageRuntime struct {
+	name          string
+	desc          protoref.MessageDescriptor
+	generatedDesc protoref.MessageDescriptor
+	dynamicType   protoref.MessageType
+	generatedType protoref.MessageType
+	hyperType     *hyperpb.MessageType
+	fieldsByName  map[string]*v2FieldRuntime
+}
+
+type v2FieldRuntime struct {
+	name      string
+	owner     *v2MessageRuntime
+	desc      protoref.FieldDescriptor
+	genDesc   protoref.FieldDescriptor
+	dataType  DataType
+	message   *v2MessageRuntime
+	repeated  bool
+	optional  bool
+	isMessage bool
+}
+
+func newV2SchemaRuntime(compiler *RPCCompiler) (*v2SchemaRuntime, error) {
+	runtime := &v2SchemaRuntime{
+		messageByName:        make(map[string]*v2MessageRuntime, len(compiler.doc.Messages)),
+		messageByFullName:    make(map[string]*v2MessageRuntime, len(compiler.doc.Messages)),
+		methodByName:         make(map[string]*Method, len(compiler.doc.Methods)),
+		serviceNamesByMethod: make(map[string]string, len(compiler.doc.Methods)),
+	}
+
+	for i := range compiler.doc.Messages {
+		message := compiler.doc.Messages[i]
+		generatedType, _ := protoregistry.GlobalTypes.FindMessageByName(message.Desc.FullName())
+		v2Message := &v2MessageRuntime{
+			name:          message.Name,
+			desc:          message.Desc,
+			dynamicType:   dynamicpb.NewMessageType(message.Desc),
+			generatedType: generatedType,
+			hyperType:     hyperpb.CompileMessageDescriptor(message.Desc),
+			fieldsByName:  make(map[string]*v2FieldRuntime, message.Desc.Fields().Len()),
+		}
+		if generatedType != nil {
+			v2Message.generatedDesc = generatedType.Descriptor()
+		}
+		runtime.messageByName[message.Name] = v2Message
+		runtime.messageByFullName[string(message.Desc.FullName())] = v2Message
+	}
+
+	for _, message := range runtime.messageByName {
+		for i := 0; i < message.desc.Fields().Len(); i++ {
+			fd := message.desc.Fields().Get(i)
+			field := &v2FieldRuntime{
+				owner:     message,
+				name:      string(fd.Name()),
+				desc:      fd,
+				dataType:  parseDataType(fd.Kind()),
+				repeated:  fd.IsList(),
+				optional:  fd.HasOptionalKeyword(),
+				isMessage: fd.Kind() == protoref.MessageKind,
+			}
+			if message.generatedDesc != nil {
+				field.genDesc = message.generatedDesc.Fields().ByName(fd.Name())
+			}
+			if field.isMessage {
+				child, ok := runtime.messageByFullName[string(fd.Message().FullName())]
+				if !ok {
+					return nil, fmt.Errorf("message runtime not found for %s", fd.Message().FullName())
+				}
+				field.message = child
+			}
+			message.fieldsByName[field.name] = field
+		}
+	}
+
+	for i := range compiler.doc.Methods {
+		method := &compiler.doc.Methods[i]
+		runtime.methodByName[method.Name] = method
+	}
+
+	for i := range compiler.doc.Services {
+		service := &compiler.doc.Services[i]
+		for _, methodRef := range service.MethodsRefs {
+			if methodRef < 0 || methodRef >= len(compiler.doc.Methods) {
+				return nil, fmt.Errorf("invalid method ref %d for service %s", methodRef, service.Name)
+			}
+			runtime.serviceNamesByMethod[compiler.doc.Methods[methodRef].Name] = service.FullName
+		}
+	}
+
+	return runtime, nil
+}
+
+func (r *v2SchemaRuntime) messageRuntime(name string) (*v2MessageRuntime, bool) {
+	msg, ok := r.messageByName[name]
+	return msg, ok
+}
+
+func (m *v2MessageRuntime) newMessage() protoref.Message {
+	if m.generatedType != nil {
+		return m.generatedType.New()
+	}
+	return m.dynamicType.New()
+}
+
+func (m *v2MessageRuntime) newDecodeMessage(shared *hyperpb.Shared) protoref.Message {
+	if m.generatedType != nil {
+		return m.generatedType.New()
+	}
+	if m.hyperType != nil && shared != nil {
+		return shared.NewMessage(m.hyperType)
+	}
+	return m.dynamicType.New()
+}
+
+func (f *v2FieldRuntime) descriptorFor(message protoref.Message) protoref.FieldDescriptor {
+	if f.genDesc != nil && f.owner != nil && f.owner.generatedType != nil && message.Type() == f.owner.generatedType {
+		return f.genDesc
+	}
+	return f.desc
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go
new file mode 100644
index 0000000000..4ce8257ecd
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_test.go
@@ -0,0 +1,603 @@
+package grpcdatasource
+
+import (
+	"context"
+	"testing"
+
+	"buf.build/go/hyperpb"
+	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
+	"google.golang.org/protobuf/proto"
+	protoref "google.golang.org/protobuf/reflect/protoreflect"
+	"google.golang.org/protobuf/types/dynamicpb"
+
+	"github.com/wundergraph/go-arena"
+
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/astparser"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve"
+	"github.com/wundergraph/graphql-go-tools/v2/pkg/grpctest"
+)
+
+func TestV2ResponseFrameBuilder_MarshalDataEnvelope(t *testing.T) {
+	builder := newV2ResponseFrameBuilder()
+	root := builder.newObject()
+	category := builder.newObject()
+	metrics := builder.newArray()
+
+	builder.setObjectField(root, "categories", metrics)
+	builder.appendArrayItem(metrics, category)
+	builder.setObjectField(category, "id", builder.newString("cat-1"))
+	builder.setObjectField(category, "name", builder.newString("Category One"))
+	builder.setObjectField(category, "score", builder.newNumber("42"))
+	builder.setObjectField(category, "active", builder.newBool(true))
+
+	data := builder.marshalDataEnvelope(root)
+	require.JSONEq(t, `{"data":{"categories":[{"id":"cat-1","name":"Category One","score":42,"active":true}]}}`, string(data))
+}
+
+func TestNewDataSourceV2_CompilesNativeProgramForSimpleQuery(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query ComplexFilterTypeQuery($filter: ComplexFilterTypeInput!) { complexFilterType(filter: $filter) { id name } }`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+	require.NotNil(t, ds.program)
+	require.True(t, ds.program.nativeOperation)
+	require.Len(t, ds.program.stages, 1)
+	require.Len(t, ds.program.stages[0].fetches, 1)
+}
+
+func TestDataSourceV2_Load_NativeMatchesV1(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query ComplexFilterTypeQuery($filter: ComplexFilterTypeInput!) { complexFilterType(filter: $filter) { id name } }`
+	variables := `{"variables":{"filter":{"filter":{"name":"test","filterField1":"test","filterField2":"test"}}}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	v1, err := NewDataSource(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+
+	v2, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+	require.True(t, v2.program.nativeOperation)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	v1Data, err := v1.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	v2Data, err := v2.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	require.JSONEq(t, string(v1Data), string(v2Data))
+}
+
+func TestDataSourceV2_Load_ResolveMatchesV1(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query CategoriesWithNullableTypes($nullType: String, $valueType: String) { categories { nullMetrics: categoryMetrics(metricType: $nullType) { id metricType value } valueMetrics: categoryMetrics(metricType: $valueType) { id metricType value } } }`
+	variables := `{"variables":{"nullType":"unavailable","valueType":"popularity_score"}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	v1, err := NewDataSource(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+
+	v2, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+	require.True(t, v2.program.nativeOperation)
+	require.False(t, v2.program.requiresFallback)
+	require.Len(t, v2.program.stages, 2)
+	require.Len(t, v2.program.stages[0].fetches, 1)
+	require.Len(t, v2.program.stages[1].fetches, 2)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	v1Data, err := v1.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	v2Data, err := v2.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	require.JSONEq(t, string(v1Data), string(v2Data))
+}
+
+func TestDataSourceV2_LoadValue_ResolveMatchesLoad(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query CategoriesWithNullableTypes($nullType: String, $valueType: String) { categories { nullMetrics: categoryMetrics(metricType: $nullType) { id metricType value } valueMetrics: categoryMetrics(metricType: $valueType) { id metricType value } } }`
+	variables := `{"variables":{"nullType":"unavailable","valueType":"popularity_score"}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	byteData, err := ds.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	value, release, err := ds.LoadValue(context.Background(), nil, input)
+	require.NoError(t, err)
+	require.NotNil(t, value)
+	require.NotNil(t, release)
+	defer release()
+
+	nativeData := value.MarshalTo(nil)
+	require.JSONEq(t, string(byteData), string(nativeData))
+}
+
+func TestDataSourceV2_LoadResult_ResolveMatchesLoadAndLoadValue(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query CategoriesWithNullableTypes($nullType: String, $valueType: String) { categories { nullMetrics: categoryMetrics(metricType: $nullType) { id metricType value } valueMetrics: categoryMetrics(metricType: $valueType) { id metricType value } } }`
+	variables := `{"variables":{"nullType":"unavailable","valueType":"popularity_score"}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+
+	mergeDataSource, ok := any(ds).(resolve.NativeMergeDataSource)
+	require.True(t, ok)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	byteData, err := ds.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	value, releaseValue, err := ds.LoadValue(context.Background(), nil, input)
+	require.NoError(t, err)
+	require.NotNil(t, value)
+	require.NotNil(t, releaseValue)
+	defer releaseValue()
+
+	result, releaseResult, err := mergeDataSource.LoadResult(context.Background(), nil, input)
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.NotNil(t, releaseResult)
+	defer releaseResult()
+
+	require.JSONEq(t, string(byteData), string(result.MarshalTo(nil)))
+
+	mergeArena := arena.NewMonotonicArena()
+	merged, err := result.MergeInto(mergeArena, nil, resolve.PostProcessingConfiguration{SelectResponseDataPath: []string{"data"}}, nil)
+	require.NoError(t, err)
+	require.JSONEq(t, gjson.GetBytes(byteData, "data").Raw, string(merged.MarshalTo(nil)))
+	require.JSONEq(t, string(value.Get("data").MarshalTo(nil)), string(merged.MarshalTo(nil)))
+}
+
+func TestDataSourceV2_LoadValue_FederationFanoutMatchesLoad(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query($representations: [_Any!]!, $input: ShippingEstimateInput!) { _entities(representations: $representations) { ...on Product { id name price shippingEstimate(input: $input) } } }`
+	variables := `{"variables":{"representations":[{"__typename":"Product","id":"1"},{"__typename":"Product","id":"2"},{"__typename":"Product","id":"3"}],"input":{"destination":"INTERNATIONAL","weight":10.0,"expedited":true}}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+		FederationConfigs: plan.FederationFieldConfigurations{
+			{
+				TypeName:     "Product",
+				SelectionSet: "id",
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	byteData, err := ds.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	value, release, err := ds.LoadValue(context.Background(), nil, input)
+	require.NoError(t, err)
+	require.NotNil(t, value)
+	require.NotNil(t, release)
+	defer release()
+
+	nativeData := value.MarshalTo(nil)
+	require.JSONEq(t, string(byteData), string(nativeData))
+}
+
+func TestV2NativeMergeResult_MergeInto_SupportsIndexedSelectPath(t *testing.T) {
+	frame := newV2ResponseFrameBuilder()
+	root := frame.newObject()
+	entities := frame.newArray()
+	product := frame.newObject()
+	frame.setObjectField(root, "_entities", entities)
+	frame.appendArrayItem(entities, product)
+	frame.setObjectField(product, "id", frame.newString("1"))
+	frame.setObjectField(product, "name", frame.newString("Table"))
+
+	result := &v2NativeMergeResult{frame: frame, root: root}
+	mergeArena := arena.NewMonotonicArena()
+	merged, err := result.MergeInto(mergeArena, nil, resolve.PostProcessingConfiguration{SelectResponseDataPath: []string{"data", "_entities", "0"}}, nil)
+	require.NoError(t, err)
+	require.NotNil(t, merged)
+	require.JSONEq(t, `{"id":"1","name":"Table"}`, string(merged.MarshalTo(nil)))
+}
+
+func TestDataSourceV2_CompilesNativeProgramForFederationFanout(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query($representations: [_Any!]!, $input: ShippingEstimateInput!) { _entities(representations: $representations) { ...on Product { id name price shippingEstimate(input: $input) } } }`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+		FederationConfigs: plan.FederationFieldConfigurations{
+			{
+				TypeName:     "Product",
+				SelectionSet: "id",
+			},
+		},
+	})
+	require.NoError(t, err)
+	require.Truef(t, ds.program.nativeOperation, "fallback reasons: %v", ds.program.fallbackReasons)
+	require.Falsef(t, ds.program.requiresFallback, "fallback reasons: %v", ds.program.fallbackReasons)
+	require.Len(t, ds.program.stages, 2)
+	require.Len(t, ds.program.stages[0].fetches, 1)
+	require.Len(t, ds.program.stages[1].fetches, 1)
+	require.Equal(t, CallKindEntity, ds.program.stages[0].fetches[0].kind)
+	require.Equal(t, CallKindResolve, ds.program.stages[1].fetches[0].kind)
+}
+
+func TestDataSourceV2_CompilesNativeProgramForFederationRequiresAndUnionResolve(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query($representations: [_Any!]!, $checkHealth: Boolean!) { _entities(representations: $representations) { ...on Storage { __typename id tagSummary storageStatus(checkHealth: $checkHealth) { ... on ActionSuccess { message timestamp } ... on ActionError { message code } } } } }`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+		FederationConfigs: plan.FederationFieldConfigurations{
+			{
+				TypeName:     "Storage",
+				SelectionSet: "id",
+			},
+			{
+				TypeName:     "Storage",
+				FieldName:    "tagSummary",
+				SelectionSet: "tags",
+			},
+		},
+	})
+	require.NoError(t, err)
+	require.Truef(t, ds.program.nativeOperation, "fallback reasons: %v", ds.program.fallbackReasons)
+	require.Falsef(t, ds.program.requiresFallback, "fallback reasons: %v", ds.program.fallbackReasons)
+}
+
+func TestDataSourceV2_LoadValue_FederationRequiresAndUnionResolveMatchesLoad(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query($representations: [_Any!]!, $checkHealth: Boolean!) { _entities(representations: $representations) { ...on Storage { __typename id tagSummary storageStatus(checkHealth: $checkHealth) { ... on ActionSuccess { message timestamp } ... on ActionError { message code } } } } }`
+	variables := `{"variables":{"representations":[{"__typename":"Storage","id":"1","tags":["electronics","gadgets","sale"]},{"__typename":"Storage","id":"2","tags":["books","fiction"]},{"__typename":"Storage","id":"3","tags":[]}],"checkHealth":true}}`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+		FederationConfigs: plan.FederationFieldConfigurations{
+			{
+				TypeName:     "Storage",
+				SelectionSet: "id",
+			},
+			{
+				TypeName:     "Storage",
+				FieldName:    "tagSummary",
+				SelectionSet: "tags",
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	input := []byte(`{"query":"` + query + `","body":` + variables + `}`)
+	byteData, err := ds.Load(context.Background(), nil, input)
+	require.NoError(t, err)
+
+	value, release, err := ds.LoadValue(context.Background(), nil, input)
+	require.NoError(t, err)
+	require.NotNil(t, value)
+	require.NotNil(t, release)
+	defer release()
+
+	nativeData := value.MarshalTo(nil)
+	require.JSONEq(t, string(byteData), string(nativeData))
+}
+
+func TestDataSourceV2_SchemaRuntimeTracksDynamicAndGeneratedHandles(t *testing.T) {
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	runtime, err := newV2SchemaRuntime(compiler)
+	require.NoError(t, err)
+
+	categoryRuntime := runtime.messageByName["Category"]
+	require.NotNil(t, categoryRuntime)
+	require.NotNil(t, categoryRuntime.dynamicType)
+	require.NotNil(t, categoryRuntime.desc)
+
+	complexFilterRuntime := runtime.messageByName["QueryComplexFilterTypeRequest"]
+	require.NotNil(t, complexFilterRuntime)
+	require.NotNil(t, complexFilterRuntime.dynamicType)
+
+	require.NotEmpty(t, runtime.methodByName)
+	require.NotEmpty(t, runtime.serviceNamesByMethod)
+}
+
+func TestDataSourceV2_NativeProgramBuildsRequestFromVariables(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query ComplexFilterTypeQuery($filter: ComplexFilterTypeInput!) { complexFilterType(filter: $filter) { id name } }`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+
+	variables := gjson.Parse(`{"filter":{"filter":{"name":"test","filterField1":"test","filterField2":"test"}}}`)
+	req, err := ds.program.stages[0].fetches[0].request.build(variables, ds.schema, ds.fallback.rc)
+	require.NoError(t, err)
+	require.Equal(t, "QueryComplexFilterTypeRequest", string(req.Descriptor().Name()))
+	filterField := req.Descriptor().Fields().ByName("filter")
+	require.NotNil(t, filterField)
+	require.True(t, req.Has(filterField))
+}
+
+func TestDataSourceV2_ResolveProgramBuildsContextRequestFromDependencyOutput(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query CategoriesWithNullableTypes($nullType: String, $valueType: String) { categories { nullMetrics: categoryMetrics(metricType: $nullType) { id metricType value } valueMetrics: categoryMetrics(metricType: $valueType) { id metricType value } } }`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+	require.True(t, ds.program.nativeOperation)
+
+	var resolveFetch *v2Fetch
+	for i := range ds.program.stages[1].fetches {
+		fetch := &ds.program.stages[1].fetches[i]
+		if fetch.kind == CallKindResolve {
+			resolveFetch = fetch
+			break
+		}
+	}
+	require.NotNil(t, resolveFetch)
+
+	dependencyRuntime := ds.schema.messageByName["QueryCategoriesResponse"]
+	categoryRuntime := ds.schema.messageByName["Category"]
+	require.NotNil(t, dependencyRuntime)
+	require.NotNil(t, categoryRuntime)
+
+	dependencyOutput := dependencyRuntime.newMessage()
+	categoriesField := dependencyOutput.Descriptor().Fields().ByName("categories")
+	require.NotNil(t, categoriesField)
+	categories := dependencyOutput.Mutable(categoriesField).List()
+
+	appendCategory := func(id, name string) {
+		category := categoryRuntime.newMessage()
+		category.Set(category.Descriptor().Fields().ByName("id"), protoref.ValueOfString(id))
+		category.Set(category.Descriptor().Fields().ByName("name"), protoref.ValueOfString(name))
+		categories.Append(protoref.ValueOfMessage(category))
+	}
+	appendCategory("cat-1", "Category One")
+	appendCategory("cat-2", "Category Two")
+
+	variables := gjson.Parse(`{"nullType":"unavailable","valueType":"popularity_score"}`)
+	req, skip, err := resolveFetch.request.buildWithDependency(variables, dependencyOutput, ds.schema, ds.fallback.rc)
+	require.NoError(t, err)
+	require.False(t, skip)
+	require.Equal(t, "ResolveCategoryCategoryMetricsRequest", string(req.Descriptor().Name()))
+
+	contextField := req.Descriptor().Fields().ByName("context")
+	require.NotNil(t, contextField)
+	contextList := req.Get(contextField).List()
+	require.Equal(t, 2, contextList.Len())
+	require.Equal(t, "cat-1", contextList.Get(0).Message().Get(contextList.Get(0).Message().Descriptor().Fields().ByName("id")).String())
+	require.Equal(t, "Category One", contextList.Get(0).Message().Get(contextList.Get(0).Message().Descriptor().Fields().ByName("name")).String())
+
+	fieldArgs := req.Descriptor().Fields().ByName("field_args")
+	require.NotNil(t, fieldArgs)
+	fieldArgsMessage := req.Get(fieldArgs).Message()
+	require.Equal(t, "unavailable", fieldArgsMessage.Get(fieldArgsMessage.Descriptor().Fields().ByName("metric_type")).String())
+}
+
+func TestV2MessageRuntime_NewDecodeMessage_UsesHyperpbWhenGeneratedTypeMissing(t *testing.T) {
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	runtime, err := newV2SchemaRuntime(compiler)
+	require.NoError(t, err)
+
+	messageRuntime := runtime.messageByName["QueryCategoriesResponse"]
+	require.NotNil(t, messageRuntime)
+	require.NotNil(t, messageRuntime.generatedType)
+	require.NotNil(t, messageRuntime.hyperType)
+
+	messageRuntime.generatedType = nil
+	messageRuntime.generatedDesc = nil
+
+	shared := new(hyperpb.Shared)
+	msg := messageRuntime.newDecodeMessage(shared)
+	require.NotNil(t, msg)
+	_, ok := msg.(*hyperpb.Message)
+	require.True(t, ok)
+	shared.Free()
+}
+
+func TestV2RequestProgram_BuildInput_UsesWirePlanForNestedRequest(t *testing.T) {
+	conn, cleanup := setupTestGRPCServer(t)
+	t.Cleanup(cleanup)
+
+	schemaDoc := grpctest.MustGraphQLSchema(t)
+	query := `query ComplexFilterTypeQuery($filter: ComplexFilterTypeInput!) { complexFilterType(filter: $filter) { id name } }`
+	queryDoc, report := astparser.ParseGraphqlDocumentString(query)
+	require.False(t, report.HasErrors())
+
+	compiler, err := NewProtoCompiler(grpctest.MustProtoSchema(t), testMapping())
+	require.NoError(t, err)
+
+	ds, err := NewDataSourceV2(conn, DataSourceConfig{
+		Operation:    &queryDoc,
+		Definition:   &schemaDoc,
+		SubgraphName: "Products",
+		Compiler:     compiler,
+		Mapping:      testMapping(),
+	})
+	require.NoError(t, err)
+
+	variables := gjson.Parse(`{"filter":{"filter":{"name":"test","filterField1":"test","filterField2":"test"}}}`)
+	input, err := ds.program.stages[0].fetches[0].request.buildInput(variables, ds.schema, ds.fallback.rc)
+	require.NoError(t, err)
+
+	wire, ok := input.(*v2PreMarshaledInput)
+	require.True(t, ok)
+	require.NotEmpty(t, wire.wire)
+
+	inputMessage, ok := ds.fallback.rc.doc.MessageByName("QueryComplexFilterTypeRequest")
+	require.True(t, ok)
+	decoded := dynamicpb.NewMessage(inputMessage.Desc)
+	require.NoError(t, proto.Unmarshal(wire.wire, decoded))
+
+	filterField := decoded.Descriptor().Fields().ByName("filter")
+	require.NotNil(t, filterField)
+	require.True(t, decoded.Has(filterField))
+	complexFilterMessage := decoded.Get(filterField).Message()
+	nestedFilterField := complexFilterMessage.Descriptor().Fields().ByName("filter")
+	require.NotNil(t, nestedFilterField)
+	nestedFilterMessage := complexFilterMessage.Get(nestedFilterField).Message()
+	require.Equal(t, "test", nestedFilterMessage.Get(nestedFilterMessage.Descriptor().Fields().ByName("name")).String())
+	require.Equal(t, "test", nestedFilterMessage.Get(nestedFilterMessage.Descriptor().Fields().ByName("filter_field_1")).String())
+	require.Equal(t, "test", nestedFilterMessage.Get(nestedFilterMessage.Descriptor().Fields().ByName("filter_field_2")).String())
+}
diff --git a/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_wire.go b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_wire.go
new file mode 100644
index 0000000000..e4847e1e6d
--- /dev/null
+++ b/v2/pkg/engine/datasource/grpc_datasource/grpc_datasource_v2_wire.go
@@ -0,0 +1,171 @@
+package grpcdatasource
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+
+	"github.com/tidwall/gjson"
+	"google.golang.org/protobuf/encoding/protowire"
+)
+
+type v2PreMarshaledInput struct {
+	wire []byte
+}
+
+type v2WirePlan struct {
+	fields []v2WireField
+}
+
+type v2WireField struct {
+	tag         []byte
+	number      protowire.Number
+	dataType    DataType
+	jsonPath    string
+	staticValue string
+	optional    bool
+	repeated    bool
+	child       *v2WirePlan
+}
+
+func compileV2WirePlan(program *v2RequestProgram) (*v2WirePlan, bool) {
+	if program == nil || program.context != nil {
+		return nil, false
+	}
+
+	plan := &v2WirePlan{
+		fields: make([]v2WireField, 0, len(program.fields)),
+	}
+
+	for i := range program.fields {
+		field := &program.fields[i]
+		if field.enumName != "" {
+			return nil, false
+		}
+
+		wireField := v2WireField{
+			number:      field.runtime.desc.Number(),
+			dataType:    field.runtime.dataType,
+			jsonPath:    field.jsonPath,
+			staticValue: field.staticValue,
+			optional:    field.optional,
+			repeated:    field.repeated,
+		}
+
+		if field.child != nil {
+			child, ok := compileV2WirePlan(field.child)
+			if !ok {
+				return nil, false
+			}
+			wireField.child = child
+		}
+
+		wireType := v2WireType(field.runtime.dataType, field.child != nil)
+		wireField.tag = protowire.AppendTag(nil, wireField.number, wireType)
+		plan.fields = append(plan.fields, wireField)
+	}
+
+	return plan, true
+}
+
+func (p *v2WirePlan) execute(buf []byte, data gjson.Result) ([]byte, error) {
+	for i := range p.fields {
+		var err error
+		buf, err = p.fields[i].appendWire(buf, data)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return buf, nil
+}
+
+func (f *v2WireField) appendWire(buf []byte, data gjson.Result) ([]byte, error) {
+	fieldData := data
+	if f.staticValue != "" {
+		fieldData = gjson.Parse(f.staticValue)
+	} else if f.jsonPath != "" {
+		fieldData = data.Get(f.jsonPath)
+	}
+
+	if isNullValue(fieldData) {
+		if f.optional {
+			return buf, nil
+		}
+		return nil, fmt.Errorf("field %s is required but has no value", f.jsonPath)
+	}
+
+	if f.repeated {
+		for _, element := range fieldData.Array() {
+			var err error
+			buf, err = f.appendSingle(buf, element)
+			if err != nil {
+				return nil, err
+			}
+		}
+		return buf, nil
+	}
+
+	return f.appendSingle(buf, fieldData)
+}
+
+func (f *v2WireField) appendSingle(buf []byte, data gjson.Result) ([]byte, error) {
+	if f.child != nil {
+		body, err := f.child.execute(nil, data)
+		if err != nil {
+			return nil, err
+		}
+		buf = append(buf, f.tag...)
+		buf = protowire.AppendVarint(buf, uint64(len(body)))
+		buf = append(buf, body...)
+		return buf, nil
+	}
+
+	switch f.dataType {
+	case DataTypeString, DataTypeBytes:
+		value := data.String()
+		buf = append(buf, f.tag...)
+		buf = protowire.AppendVarint(buf, uint64(len(value)))
+		buf = append(buf, value...)
+	case DataTypeBool:
+		buf = append(buf, f.tag...)
+		if data.Bool() {
+			buf = protowire.AppendVarint(buf, 1)
+		} else {
+			buf = protowire.AppendVarint(buf, 0)
+		}
+	case DataTypeInt32, DataTypeInt64, DataTypeUint32, DataTypeUint64, DataTypeEnum:
+		buf = append(buf, f.tag...)
+		buf = protowire.AppendVarint(buf, uint64(data.Int()))
+	case DataTypeDouble:
+		buf = append(buf, f.tag...)
+		var bits [8]byte
+		binary.LittleEndian.PutUint64(bits[:], math.Float64bits(data.Float()))
+		buf = append(buf, bits[:]...)
+	case DataTypeFloat:
+		buf = append(buf, f.tag...)
+		var bits [4]byte
+		binary.LittleEndian.PutUint32(bits[:], math.Float32bits(float32(data.Float())))
+		buf = append(buf, bits[:]...)
+	default:
+		return nil, fmt.Errorf("unsupported wire data type %s", f.dataType)
+	}
+
+	return buf, nil
+}
+
+func v2WireType(dataType DataType, isMessage bool) protowire.Type {
+	if isMessage {
+		return protowire.BytesType
+	}
+
+	switch dataType {
+	case DataTypeString, DataTypeBytes:
+		return protowire.BytesType
+	case DataTypeDouble:
+		return protowire.Fixed64Type
+	case DataTypeFloat:
+		return protowire.Fixed32Type
+	default:
+		return protowire.VarintType
+	}
+}
diff --git a/v2/pkg/engine/resolve/datasource.go b/v2/pkg/engine/resolve/datasource.go
index b03bdd0781..8610936c01 100644
--- a/v2/pkg/engine/resolve/datasource.go
+++ b/v2/pkg/engine/resolve/datasource.go
@@ -4,6 +4,9 @@ import (
 	"context"
 	"net/http"
 
+	"github.com/wundergraph/astjson"
+	"github.com/wundergraph/go-arena"
+
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/datasource/httpclient"
 )
 
@@ -12,6 +15,34 @@ type DataSource interface {
 	LoadWithFiles(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (data []byte, err error)
 }
 
+// NativeDataSource is an optional extension for datasources that can return an
+// arena-rooted JSON value directly instead of serialized bytes.
+//
+// The returned value remains valid until cleanup is called. The loader must call
+// cleanup exactly once after it has finished reading the value.
+type NativeDataSource interface {
+	LoadValue(ctx context.Context, headers http.Header, input []byte) (value *astjson.Value, cleanup func(), err error)
+	LoadWithFilesValue(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (value *astjson.Value, cleanup func(), err error)
+}
+
+// NativeMergeResult is an optional result shape for datasources that can merge
+// native results into the resolver's arena directly, without first materializing
+// an intermediate astjson response tree per subgraph fetch.
+type NativeMergeResult interface {
+	MergeInto(a arena.Arena, items []*astjson.Value, postProcessing PostProcessingConfiguration, batchStats [][]*astjson.Value) (root *astjson.Value, err error)
+	MarshalTo(dst []byte) []byte
+}
+
+// NativeMergeDataSource is an optional extension for datasources that can
+// return a mergeable native result object directly.
+//
+// Returning a nil result tells the loader to continue with the next available
+// datasource contract, e.g. NativeDataSource or the byte contract.
+type NativeMergeDataSource interface {
+	LoadResult(ctx context.Context, headers http.Header, input []byte) (result NativeMergeResult, cleanup func(), err error)
+	LoadWithFilesResult(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (result NativeMergeResult, cleanup func(), err error)
+}
+
 type SubscriptionDataSource interface {
 	// Start is called when a new subscription is created. It establishes the connection to the data source.
 	// The updater is used to send updates to the client. Deduplication of the request must be done before calling this method.
diff --git a/v2/pkg/engine/resolve/loader.go b/v2/pkg/engine/resolve/loader.go
index 0e6d7090db..800a9652e4 100644
--- a/v2/pkg/engine/resolve/loader.go
+++ b/v2/pkg/engine/resolve/loader.go
@@ -131,6 +131,9 @@ type result struct {
 	httpResponseContext *httpclient.ResponseContext
 	// out is the subgraph response body
 	out               []byte
+	outMerge          NativeMergeResult
+	outValue          *astjson.Value
+	outCleanup        func()
 	singleFlightStats *singleFlightStats
 	tools             *batchEntityTools
 }
@@ -190,6 +193,8 @@ type Loader struct {
 	// singleFlight is the SubgraphRequestSingleFlight object shared across all client requests.
 	// It's thread safe and can be used to de-duplicate subgraph requests.
 	singleFlight *SubgraphRequestSingleFlight
+
+	pendingCleanups []func()
 }
 
 func (l *Loader) Free() {
@@ -197,6 +202,10 @@ func (l *Loader) Free() {
 	l.ctx = nil
 	l.resolvable = nil
 	l.taintedObjs = nil
+	for _, cleanup := range l.pendingCleanups {
+		cleanup()
+	}
+	l.pendingCleanups = l.pendingCleanups[:0]
 }
 
 func (l *Loader) LoadGraphQLResponseData(ctx *Context, response *GraphQLResponse, resolvable *Resolvable) (err error) {
@@ -324,6 +333,9 @@ func (l *Loader) resolveSingle(item *FetchItem) error {
 
 func (l *Loader) callOnFinished(res *result) {
 	if l.ctx.LoaderHooks != nil && res.loaderHookContext != nil {
+		if res.out == nil && res.outMerge != nil {
+			res.out = res.outMerge.MarshalTo(nil)
+		}
 		l.ctx.LoaderHooks.OnFinished(res.loaderHookContext, res.ds, newResponseInfo(res, l.ctx.subgraphErrors))
 	}
 }
@@ -485,21 +497,54 @@ func (l *Loader) mergeResult(fetchItem *FetchItem, res *result, items []*astjson
 	if res.fetchSkipped {
 		return nil
 	}
-	if len(res.out) == 0 {
-		return l.renderErrorsFailedToFetch(fetchItem, res, emptyGraphQLResponse)
+	if res.outMerge != nil {
+		if res.outCleanup != nil {
+			l.pendingCleanups = append(l.pendingCleanups, res.outCleanup)
+			res.outCleanup = nil
+		}
+		responseData, err := res.outMerge.MergeInto(l.jsonArena, items, res.postProcessing, res.batchStats)
+		if err != nil {
+			return errors.WithStack(ErrMergeResult{
+				Subgraph: res.ds.Name,
+				Reason:   err,
+				Path:     fetchItem.ResponsePath,
+			})
+		}
+		if len(items) == 0 {
+			if responseData == nil || responseData.Type() != astjson.TypeObject {
+				return l.renderErrorsFailedToFetch(fetchItem, res, invalidGraphQLResponseShape)
+			}
+			l.resolvable.data = responseData
+		}
+		return nil
 	}
-	// astjson.ParseBytesWithArena copies bytes onto the arena internally,
-	// tying the byte lifecycle to the arena and preventing GC-related segfaults.
-	response, err := astjson.ParseBytesWithArena(l.jsonArena, res.out)
-	if err != nil {
-		// Fall back to status code if parsing fails and non-2XX
-		if (res.statusCode > 0 && res.statusCode < 200) || res.statusCode >= 300 {
-			return l.renderErrorsStatusFallback(fetchItem, res, res.statusCode)
+	var response *astjson.Value
+	if res.outValue != nil {
+		response = res.outValue
+		if res.outCleanup != nil {
+			l.pendingCleanups = append(l.pendingCleanups, res.outCleanup)
+			res.outCleanup = nil
+		}
+		res.outValue = nil
+	} else {
+		if len(res.out) == 0 {
+			return l.renderErrorsFailedToFetch(fetchItem, res, emptyGraphQLResponse)
+		}
+		// astjson.ParseBytesWithArena copies bytes onto the arena internally,
+		// tying the byte lifecycle to the arena and preventing GC-related segfaults.
+		var err error
+		response, err = astjson.ParseBytesWithArena(l.jsonArena, res.out)
+		if err != nil {
+			// Fall back to status code if parsing fails and non-2XX
+			if (res.statusCode > 0 && res.statusCode < 200) || res.statusCode >= 300 {
+				return l.renderErrorsStatusFallback(fetchItem, res, res.statusCode)
+			}
+			return l.renderErrorsFailedToFetch(fetchItem, res, invalidGraphQLResponse)
 		}
-		return l.renderErrorsFailedToFetch(fetchItem, res, invalidGraphQLResponse)
 	}
 
 	var responseData *astjson.Value
+	var err error
 	if res.postProcessing.SelectResponseDataPath != nil {
 		responseData = response.Get(res.postProcessing.SelectResponseDataPath...)
 	} else {
@@ -1759,6 +1804,38 @@ func (l *Loader) loadByContext(ctx context.Context, source DataSource, fetchItem
 }
 
 func (l *Loader) loadByContextDirect(ctx context.Context, source DataSource, headers http.Header, input []byte, res *result) error {
+	if native, ok := source.(NativeMergeDataSource); ok {
+		if l.ctx.Files != nil {
+			res.outMerge, res.outCleanup, res.err = native.LoadWithFilesResult(ctx, headers, input, l.ctx.Files)
+		} else {
+			res.outMerge, res.outCleanup, res.err = native.LoadResult(ctx, headers, input)
+		}
+		if res.err != nil {
+			if res.outCleanup != nil {
+				res.outCleanup()
+				res.outCleanup = nil
+			}
+			return errors.WithStack(res.err)
+		}
+		if res.outMerge != nil {
+			return nil
+		}
+	}
+	if native, ok := source.(NativeDataSource); ok {
+		if l.ctx.Files != nil {
+			res.outValue, res.outCleanup, res.err = native.LoadWithFilesValue(ctx, headers, input, l.ctx.Files)
+		} else {
+			res.outValue, res.outCleanup, res.err = native.LoadValue(ctx, headers, input)
+		}
+		if res.err != nil {
+			if res.outCleanup != nil {
+				res.outCleanup()
+				res.outCleanup = nil
+			}
+			return errors.WithStack(res.err)
+		}
+		return nil
+	}
 	if l.ctx.Files != nil {
 		res.out, res.err = source.LoadWithFiles(ctx, headers, input, l.ctx.Files)
 	} else {
diff --git a/v2/pkg/engine/resolve/resolve.go b/v2/pkg/engine/resolve/resolve.go
index f735752ef9..a094bf6e93 100644
--- a/v2/pkg/engine/resolve/resolve.go
+++ b/v2/pkg/engine/resolve/resolve.go
@@ -332,6 +332,7 @@ func (r *Resolver) ResolveGraphQLResponse(ctx *Context, response *GraphQLRespons
 	}()
 
 	t := newTools(r.options, r.allowedErrorExtensionFields, r.allowedErrorFields, r.subgraphRequestSingleFlight, nil)
+	defer t.loader.Free()
 
 	err := t.resolvable.Init(ctx, data, response.Info.OperationType)
 	if err != nil {
@@ -384,6 +385,7 @@ func (r *Resolver) ArenaResolveGraphQLResponse(ctx *Context, response *GraphQLRe
 	resolveArena := r.resolveArenaPool.Acquire(ctx.Request.ID)
 	// we're intentionally not using defer Release to have more control over the timing (see below)
 	t := newTools(r.options, r.allowedErrorExtensionFields, r.allowedErrorFields, r.subgraphRequestSingleFlight, resolveArena.Arena)
+	defer t.loader.Free()
 
 	err = t.resolvable.Init(ctx, nil, response.Info.OperationType)
 	if err != nil {
diff --git a/v2/pkg/engine/resolve/resolve_test.go b/v2/pkg/engine/resolve/resolve_test.go
index 98568556ab..6f6d981654 100644
--- a/v2/pkg/engine/resolve/resolve_test.go
+++ b/v2/pkg/engine/resolve/resolve_test.go
@@ -18,6 +18,7 @@ import (
 	"github.com/stretchr/testify/require"
 
 	"github.com/wundergraph/astjson"
+	"github.com/wundergraph/go-arena"
 
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/datasource/httpclient"
@@ -69,6 +70,74 @@ func fakeDataSourceWithInputCheck(t TestingTB, input []byte, data []byte) *_fake
 	}
 }
 
+type nativeValueDataSource struct {
+	loadCalled        atomic.Int32
+	loadValueCalled   atomic.Int32
+	cleanupCalled     atomic.Int32
+	loadWithFilesCall atomic.Int32
+}
+
+func (d *nativeValueDataSource) Load(ctx context.Context, headers http.Header, input []byte) ([]byte, error) {
+	d.loadCalled.Add(1)
+	return []byte(`{"errors":[{"message":"legacy path used"}]}`), nil
+}
+
+func (d *nativeValueDataSource) LoadWithFiles(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) ([]byte, error) {
+	d.loadWithFilesCall.Add(1)
+	return d.Load(ctx, headers, input)
+}
+
+func (d *nativeValueDataSource) LoadValue(ctx context.Context, headers http.Header, input []byte) (*astjson.Value, func(), error) {
+	d.loadValueCalled.Add(1)
+	root := astjson.ObjectValue(nil)
+	data := astjson.ObjectValue(nil)
+	data.Set(nil, "field", astjson.StringValue(nil, "value"))
+	root.Set(nil, "data", data)
+	return root, func() {
+		d.cleanupCalled.Add(1)
+	}, nil
+}
+
+func (d *nativeValueDataSource) LoadWithFilesValue(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (*astjson.Value, func(), error) {
+	return d.LoadValue(ctx, headers, input)
+}
+
+type nativeMergeResult struct {
+	root *astjson.Value
+}
+
+func (r *nativeMergeResult) MergeInto(a arena.Arena, items []*astjson.Value, postProcessing PostProcessingConfiguration, batchStats [][]*astjson.Value) (*astjson.Value, error) {
+	data := astjson.ObjectValue(a)
+	data.Set(a, "field", astjson.StringValue(a, "merge-value"))
+	if len(items) > 0 {
+		_, _, err := astjson.MergeValuesWithPath(a, items[0], data, postProcessing.MergePath...)
+		return nil, err
+	}
+	return data, nil
+}
+
+func (r *nativeMergeResult) MarshalTo(dst []byte) []byte {
+	return r.root.MarshalTo(dst)
+}
+
+type nativeMergeDataSource struct {
+	nativeValueDataSource
+
+	loadResultCalled atomic.Int32
+}
+
+func (d *nativeMergeDataSource) LoadResult(ctx context.Context, headers http.Header, input []byte) (NativeMergeResult, func(), error) {
+	d.loadResultCalled.Add(1)
+	root := astjson.MustParseBytes([]byte(`{"data":{"field":"merge-value"}}`))
+	return &nativeMergeResult{root: root}, func() {
+		d.cleanupCalled.Add(1)
+	}, nil
+}
+
+func (d *nativeMergeDataSource) LoadWithFilesResult(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (NativeMergeResult, func(), error) {
+	return d.LoadResult(ctx, headers, input)
+}
+
 type blockingDataSource struct {
 	data        []byte
 	ready       chan struct{}
@@ -5009,6 +5078,74 @@ func TestResolver_ArenaResolveGraphQLResponse(t *testing.T) {
 	}))
 }
 
+func TestResolver_ArenaResolveGraphQLResponse_UsesNativeValueDataSourceAndCallsCleanup(t *testing.T) {
+	ds := &nativeValueDataSource{}
+	response, _ := gcTestResponse(ds)
+
+	resolver := newResolver(context.Background())
+	resolveCtx := NewContext(context.Background())
+	buf := &bytes.Buffer{}
+
+	_, err := resolver.ArenaResolveGraphQLResponse(resolveCtx, response, buf)
+	require.NoError(t, err)
+	assert.JSONEq(t, `{"data":{"field":"value"}}`, buf.String())
+	assert.Equal(t, int32(0), ds.loadCalled.Load())
+	assert.Equal(t, int32(0), ds.loadWithFilesCall.Load())
+	assert.Equal(t, int32(1), ds.loadValueCalled.Load())
+	assert.Equal(t, int32(1), ds.cleanupCalled.Load())
+}
+
+func TestResolver_ArenaResolveGraphQLResponse_PrefersNativeMergeDataSourceAndCallsCleanup(t *testing.T) {
+	ds := &nativeMergeDataSource{}
+	response, _ := gcTestResponse(ds)
+
+	resolver := newResolver(context.Background())
+	resolveCtx := NewContext(context.Background())
+	buf := &bytes.Buffer{}
+
+	_, err := resolver.ArenaResolveGraphQLResponse(resolveCtx, response, buf)
+	require.NoError(t, err)
+	assert.JSONEq(t, `{"data":{"field":"merge-value"}}`, buf.String())
+	assert.Equal(t, int32(0), ds.loadCalled.Load())
+	assert.Equal(t, int32(0), ds.loadWithFilesCall.Load())
+	assert.Equal(t, int32(0), ds.loadValueCalled.Load())
+	assert.Equal(t, int32(1), ds.loadResultCalled.Load())
+	assert.Equal(t, int32(1), ds.cleanupCalled.Load())
+}
+
+func BenchmarkResolver_ArenaResolveGraphQLResponse_NativeBoundary(b *testing.B) {
+	benchmarks := []struct {
+		name string
+		ds   DataSource
+	}{
+		{
+			name: "native_value",
+			ds:   &nativeValueDataSource{},
+		},
+		{
+			name: "native_merge",
+			ds:   &nativeMergeDataSource{},
+		},
+	}
+
+	for _, bench := range benchmarks {
+		b.Run(bench.name, func(b *testing.B) {
+			response, _ := gcTestResponse(bench.ds)
+			resolver := newResolver(context.Background())
+			b.ReportAllocs()
+
+			for i := 0; i < b.N; i++ {
+				resolveCtx := NewContext(context.Background())
+				buf := &bytes.Buffer{}
+				_, err := resolver.ArenaResolveGraphQLResponse(resolveCtx, response, buf)
+				if err != nil {
+					b.Fatal(err)
+				}
+			}
+		})
+	}
+}
+
 func TestResolver_ArenaResolveGraphQLResponse_RequestDeduplication(t *testing.T) {
 	rCtx, cancel := context.WithCancel(context.Background())
 	defer cancel()