NVIDIA
diff --git a/‎python/runtime/cudaq/platform/py_alt_launch_kernel.cpp‎
Lines changed: 1 addition & 2 deletions b/‎python/runtime/cudaq/platform/py_alt_launch_kernel.cpp‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎runtime/common/CompiledModule.cpp‎
Lines changed: 40 additions & 83 deletions b/‎runtime/common/CompiledModule.cpp‎
Lines changed: 40 additions & 83 deletions
diff --git a/‎runtime/common/CompiledModule.h‎
Lines changed: 56 additions & 64 deletions b/‎runtime/common/CompiledModule.h‎
Lines changed: 56 additions & 64 deletions
diff --git a/‎runtime/cudaq/platform/qpu.cpp‎
Lines changed: 40 additions & 1 deletion b/‎runtime/cudaq/platform/qpu.cpp‎
Lines changed: 40 additions & 1 deletion
@@ -1143,8 +1143,7 @@ void cudaq::bindAltLaunchKernel(nanobind::module_ &mod,
       .def_prop_ro(
           "entry_point",
           [](const cudaq::CompiledModule &ck) {
-            return reinterpret_cast<std::uintptr_t>(
-                ck.getJit().getEntryPoint());
+            return reinterpret_cast<std::uintptr_t>(ck.getJit()->getFn());
           },
           "The address of the JIT-compiled entry point.")
       .def_prop_ro("is_fully_specialized",
 
@@ -7,47 +7,58 @@
  ******************************************************************************/
 
 #include "CompiledModule.h"
-#include <cstring>
-#include <memory>
 #include <stdexcept>
 
 cudaq::CompiledModule::CompiledModule(std::string kernelName)
     : name(std::move(kernelName)) {}
 
-const cudaq::CompiledModule::JitArtifact &
-cudaq::CompiledModule::getJit() const {
-  for (auto &[key, artifact] : artifacts)
-    if (auto *jit = std::get_if<JitArtifact>(&artifact))
-      return *jit;
-  throw std::runtime_error("CompiledModule has no JIT artifact.");
+std::optional<cudaq::CompiledModule::JitArtifact>
+cudaq::CompiledModule::getJit(std::optional<std::string> jitName) const {
+  auto name = jitName.value_or(this->name);
+  auto it = artifacts.find(name);
+  if (it == artifacts.end())
+    return std::nullopt;
+  const auto *jit = std::get_if<JitArtifact>(&it->second);
+  return jit ? std::optional(*jit) : std::nullopt;
 }
 
-const cudaq::CompiledModule::MlirArtifact &
-cudaq::CompiledModule::getMlir() const {
-  for (auto &[key, artifact] : artifacts)
-    if (auto *mlir = std::get_if<MlirArtifact>(&artifact))
-      return *mlir;
-  throw std::runtime_error("CompiledModule has no MLIR artifact.");
+std::optional<cudaq::CompiledModule::MlirArtifact>
+cudaq::CompiledModule::getMlir(std::optional<std::string> mlirName) const {
+  auto name = mlirName.value_or(this->name + ".mlir");
+  auto it = artifacts.find(name);
+  if (it == artifacts.end())
+    return std::nullopt;
+  const auto *mlir = std::get_if<MlirArtifact>(&it->second);
+  return mlir ? std::optional(*mlir) : std::nullopt;
 }
 
-bool cudaq::CompiledModule::hasJit() const {
-  for (auto &[key, artifact] : artifacts)
-    if (std::holds_alternative<JitArtifact>(artifact))
-      return true;
-  return false;
+bool cudaq::CompiledModule::isFullySpecialized() const {
+  return getArgsCreator() == nullptr;
 }
 
-bool cudaq::CompiledModule::hasMlir() const {
-  for (auto &[key, artifact] : artifacts)
-    if (std::holds_alternative<MlirArtifact>(artifact))
-      return true;
-  return false;
+int64_t (*cudaq::CompiledModule::getArgsCreator() const)(const void *,
+                                                         void **) {
+  auto jit = getJit(name + ".argsCreator");
+  return jit ? reinterpret_cast<int64_t (*)(const void *, void **)>(jit->fn)
+             : nullptr;
 }
 
-bool cudaq::CompiledModule::isFullySpecialized() const {
-  if (!hasJit())
-    return true; // No JIT artifact → fully specialized.
-  return getJit().argsCreator == nullptr;
+std::optional<std::int64_t> cudaq::CompiledModule::getReturnOffset() const {
+  auto jit = getJit(name + ".returnOffset");
+  if (!jit)
+    return std::nullopt;
+  auto fn = reinterpret_cast<std::int64_t (*)()>(jit->fn);
+  return fn();
+}
+
+const cudaq::Resources *cudaq::CompiledModule::getResources(
+    std::optional<std::string> resourcesName) const {
+  auto name = resourcesName.value_or(this->name + ".resources");
+  auto it = artifacts.find(name);
+  if (it == artifacts.end())
+    return nullptr;
+  const auto *resources = std::get_if<ResourcesArtifact>(&it->second);
+  return resources ? &resources->getResources() : nullptr;
 }
 
 void cudaq::CompiledModule::addArtifact(std::string name,
@@ -57,62 +68,8 @@ void cudaq::CompiledModule::addArtifact(std::string name,
   artifacts.emplace(std::move(name), std::move(artifact));
 }
 
-cudaq::KernelThunkResultType
-cudaq::CompiledModule::execute(const std::vector<void *> &rawArgs) const {
-  auto &jit = getJit();
-  auto funcPtr = jit.entryPoint;
-  if (!isFullySpecialized()) {
-    // Pack args at runtime via argsCreator, then call the thunk.
-    void *buff = nullptr;
-    jit.argsCreator(static_cast<const void *>(rawArgs.data()), &buff);
-    reinterpret_cast<KernelThunkResultType (*)(void *, bool)>(funcPtr)(
-        buff, /*client_server=*/false);
-    // If the kernel has a result, copy it from the packed buffer into
-    // rawArgs.back() (where the caller expects to find it).
-    if (resultInfo.hasResult()) {
-      auto offset = jit.returnOffset();
-      std::memcpy(rawArgs.back(), static_cast<char *>(buff) + offset,
-                  resultInfo.bufferSize);
-    }
-    std::free(buff);
-    return {nullptr, 0};
-  }
-  if (resultInfo.hasResult()) {
-    // Fully specialized with result: rawArgs.back() is the pre-allocated
-    // result buffer; pass it directly to the thunk.
-    void *buff = const_cast<void *>(rawArgs.back());
-    return reinterpret_cast<KernelThunkResultType (*)(void *, bool)>(funcPtr)(
-        buff, /*client_server=*/false);
-  }
-  // Fully specialized, no result.
-  jit.entryPoint();
-  return {nullptr, 0};
-}
-
-cudaq::KernelThunkResultType cudaq::CompiledModule::execute() const {
-  if (!isFullySpecialized())
-    throw std::runtime_error(
-        "Kernel has unspecialized parameters; call execute(rawArgs) instead.");
-  if (!resultInfo.hasResult()) {
-    getJit().entryPoint();
-    return {nullptr, 0};
-  }
-  // Allocate a result buffer on-the-fly.
-  auto buf = std::make_unique<char[]>(resultInfo.bufferSize);
-  std::vector<void *> rawArgs = {buf.get()};
-  execute(rawArgs);
-  return {buf.release(), resultInfo.bufferSize};
-}
-
-void (*cudaq::CompiledModule::JitArtifact::getEntryPoint() const)() {
-  return entryPoint;
-}
+void (*cudaq::CompiledModule::JitArtifact::getFn() const)() { return fn; }
 
 cudaq::JitEngine cudaq::CompiledModule::JitArtifact::getEngine() const {
   return engine;
 }
-
-std::optional<cudaq::Resources>
-cudaq::CompiledModule::JitArtifact::getResourceCounts() const {
-  return resourceCounts;
-}
@@ -87,13 +87,15 @@ class ResultInfo {
 public:
   /// Whether this kernel has a result that must be marshaled.
   bool hasResult() const { return typeOpaquePtr != nullptr; }
+  /// Get the size (in bytes) of the buffer needed to hold the result value.
+  std::size_t getBufferSize() const { return bufferSize; }
 };
 
 /// @brief A compiled MLIR module, ready for execution or code generation.
 ///
 /// Contains any number of named compilation artifacts (we currently support
-/// JIT binaries and optimized MLIR modules) that result from the compilation
-/// of a Quake MLIR module.
+/// JIT binaries, optimized MLIR modules, and pre-computed resource metrics)
+/// that result from the compilation of a Quake MLIR module.
 ///
 /// This type does not depend on MLIR/LLVM — it only keeps type-erased / opaque
 /// pointers. Build instances with
@@ -105,46 +107,18 @@ class CompiledModule {
   /// JIT-compiled artifact, ready for local execution.
   class JitArtifact {
     JitEngine engine;
-    void (*entryPoint)() = nullptr;
-    std::int64_t (*argsCreator)(const void *, void **) = nullptr;
-    /// Offset (in bytes) of the result field within the argsCreator-packed
-    /// buffer. Only valid when argsCreator is non-null and the kernel has a
-    /// result. Use resultInfo.bufferSize to know how many bytes to copy.
-    std::int64_t (*returnOffset)() = nullptr;
-    std::optional<Resources> resourceCounts;
-
-    JitArtifact(JitEngine engine, void (*entryPoint)(),
-                int64_t (*argsCreator)(const void *, void **),
-                int64_t (*returnOffset)(),
-                std::optional<Resources> resourceCounts)
-        : engine(engine), entryPoint(entryPoint), argsCreator(argsCreator),
-          returnOffset(returnOffset),
-          resourceCounts(std::move(resourceCounts)) {}
+    void (*fn)() = nullptr;
+
+    JitArtifact(JitEngine engine, void (*fn)())
+        : engine(std::move(engine)), fn(fn) {}
 
     friend class CompiledModule;
     friend class cudaq_internal::compiler::CompiledModuleHelper;
 
   public:
-    // TODO: remove the following two methods once the `CompiledModule` instance
-    // is returned to Python.
-
-    /// @brief Get the entry point of the kernel as a function pointer.
-    ///
-    /// Assumes that there is (exactly one) compiled JIT artifact.
-    ///
-    /// The returned function pointer will expect different arguments depending
-    /// on the kernel:
-    ///  - if the kernel returns a value and/or is not fully specialized, the
-    ///    entry point will expect a pointer to a buffer storing the packed
-    ///    arguments and result.
-    ///  - otherwise, the entry point will not expect any arguments.
-    ///
-    /// Prefer using `CompiledModule::execute` instead of calling this function
-    /// as it will handle the buffer and argument packing automatically.
-    void (*getEntryPoint() const)();
+    /// Get the raw function pointer stored in this artifact.
+    void (*getFn() const)();
     JitEngine getEngine() const;
-
-    std::optional<Resources> getResourceCounts() const;
   };
 
   /// Optimized MLIR module artifact, for deferred code generation or
@@ -165,8 +139,22 @@ class CompiledModule {
     friend class cudaq_internal::compiler::CompiledModuleHelper;
   };
 
-  /// A compiled artifact is either a JIT binary or an MLIR module.
-  using CompiledArtifact = std::variant<JitArtifact, MlirArtifact>;
+  /// Pre-computed resource metrics (gate counts, depth) from IR analysis.
+  class ResourcesArtifact {
+    Resources resources;
+
+    ResourcesArtifact(Resources resources) : resources(std::move(resources)) {}
+
+    friend class CompiledModule;
+    friend class cudaq_internal::compiler::CompiledModuleHelper;
+
+  public:
+    const Resources &getResources() const { return resources; }
+  };
+
+  /// A compiled artifact is a JIT binary, an MLIR module, or resource metrics.
+  using CompiledArtifact =
+      std::variant<JitArtifact, MlirArtifact, ResourcesArtifact>;
 
   // --- Compilation metadata ---
 
@@ -178,47 +166,51 @@ class CompiledModule {
 
   // --- Queries ---
 
-  /// Whether any artifact in the map is a JitArtifact.
-  bool hasJit() const;
-
-  /// Whether any artifact in the map is an MlirArtifact.
-  bool hasMlir() const;
+  /// Get the JIT artifact with the given name.
+  ///
+  /// If no name is provided, defaults to the kernel name.
+  std::optional<JitArtifact>
+  getJit(std::optional<std::string> jitName = std::nullopt) const;
 
-  /// Get the compiled JIT artifact. Returns the first one found.
+  /// Get the MLIR artifact with the given name.
   ///
-  /// Throws if none exists.
-  const JitArtifact &getJit() const;
+  /// If no name is provided, defaults to `kernel_name + ".mlir"`.
+  std::optional<MlirArtifact>
+  getMlir(std::optional<std::string> mlirName = std::nullopt) const;
 
-  /// Get the optimized MLIR artifact. Returns the first one found.
+  /// Get the pre-computed resource counts, or `nullptr` if it does not exist.
   ///
-  /// Throws if none exists.
-  const MlirArtifact &getMlir() const;
+  /// If no name is provided, defaults to `kernel_name + ".resources"`.
+  const Resources *
+  getResources(std::optional<std::string> resourcesName = std::nullopt) const;
 
   /// Get all compiled artifacts.
   const std::map<std::string, CompiledArtifact> &getArtifacts() const {
     return artifacts;
   }
 
-  /// Whether the kernel is fully specialized (all arguments inlined). For JIT
-  /// kernels this means `argsCreator` is null.
-  /// Kernels without a JIT artifact are considered fully specialized.
+  /// Whether the kernel is fully specialized (all arguments inlined).
+  ///
+  /// Currently, kernels are considered fully specialized if and only if they do
+  /// not have an `argsCreator` artifact.
   bool isFullySpecialized() const;
 
-  const std::string &getName() const { return name; }
-  const ResultInfo &getResultInfo() const { return resultInfo; }
-  const CompilationMetadata &getMetadata() const { return metadata; }
-
-  // --- Execution (local JIT path) ---
-
-  /// @brief Execute a fully specialized kernel (no external arguments needed).
+  /// Get the argument-marshaling function, or `nullptr` if it does not exist.
   ///
-  /// Assumes that there is (exactly one) compiled JIT artifact.
-  KernelThunkResultType execute() const;
+  /// Assumes the artifact is named `kernelName + ".argsCreator"`.
+  int64_t (*getArgsCreator() const)(const void *, void **);
 
-  /// @brief Execute the JIT-ed kernel with caller-provided arguments.
+  /// Get the offset (in bytes) of the result field within the
+  /// `argsCreator`-packed buffer, evaluating the stored JIT function.
+  /// Returns `std::nullopt` if no `.returnOffset` artifact was emitted
+  /// (e.g. the kernel has no result or is fully specialized).
   ///
-  /// Assumes that there is (exactly one) compiled JIT artifact.
-  KernelThunkResultType execute(const std::vector<void *> &rawArgs) const;
+  /// Assumes the artifact is named `kernelName + ".returnOffset"`.
+  std::optional<std::int64_t> getReturnOffset() const;
+
+  const std::string &getName() const { return name; }
+  const ResultInfo &getResultInfo() const { return resultInfo; }
+  const CompilationMetadata &getMetadata() const { return metadata; }
 
 private:
   friend class cudaq_internal::compiler::CompiledModuleHelper;
 
@@ -8,11 +8,50 @@
 
 #include "qpu.h"
 #include "mlir/IR/BuiltinOps.h"
+#include <cstring>
 
 using namespace cudaq_internal::compiler;
 
 LLVM_INSTANTIATE_REGISTRY(cudaq::ModuleLauncher::RegistryType)
 
+/// Execute a JIT-compiled kernel with provided arguments.
+///
+/// Handles argument marshaling via `argsCreator` (if not fully specialized) and
+/// result buffer allocation.
+cudaq::KernelThunkResultType
+launchCompiledModule(const cudaq::CompiledModule &compiled,
+                     const std::vector<void *> &rawArgs) {
+  auto funcPtr = compiled.getJit()->getFn();
+  const auto &resultInfo = compiled.getResultInfo();
+  if (!compiled.isFullySpecialized()) {
+    // Pack args at runtime via argsCreator, then call the thunk.
+    auto argsCreator = compiled.getArgsCreator();
+    void *buff = nullptr;
+    argsCreator(static_cast<const void *>(rawArgs.data()), &buff);
+    reinterpret_cast<cudaq::KernelThunkResultType (*)(void *, bool)>(funcPtr)(
+        buff, /*client_server=*/false);
+    // If the kernel has a result, copy it from the packed buffer into
+    // rawArgs.back() (where the caller expects to find it).
+    if (resultInfo.hasResult()) {
+      auto offset = compiled.getReturnOffset().value();
+      std::memcpy(rawArgs.back(), static_cast<char *>(buff) + offset,
+                  resultInfo.getBufferSize());
+    }
+    std::free(buff);
+    return {nullptr, 0};
+  }
+  if (resultInfo.hasResult()) {
+    // Fully specialized with result: rawArgs.back() is the pre-allocated
+    // result buffer; pass it directly to the thunk.
+    void *buff = const_cast<void *>(rawArgs.back());
+    return reinterpret_cast<cudaq::KernelThunkResultType (*)(void *, bool)>(
+        funcPtr)(buff, /*client_server=*/false);
+  }
+  // Fully specialized, no result.
+  funcPtr();
+  return {nullptr, 0};
+}
+
 cudaq::KernelThunkResultType
 cudaq::QPU::launchModule(const std::string &name, mlir::ModuleOp module,
                          const std::vector<void *> &rawArgs) {
@@ -23,7 +62,7 @@ cudaq::QPU::launchModule(const std::string &name, mlir::ModuleOp module,
         "result of attempting to use `launchModule` outside Python.");
   ScopedTraceWithContext(cudaq::TIMING_LAUNCH, "QPU::launchModule", name);
   auto compiled = launcher->compileModule(name, module, rawArgs, true);
-  return compiled.execute(rawArgs);
+  return launchCompiledModule(compiled, rawArgs);
 }
 
 cudaq::CompiledModule