From 5d5548db88f3e78f35159cb1ac24e0a06713ec51 Mon Sep 17 00:00:00 2001 From: Luca Mondada Date: Fri, 5 Jun 2026 19:12:44 +0200 Subject: [PATCH 1/2] Remove MLIR header deps, remove AnyModule Signed-off-by: Luca Mondada --- python/runtime/interop/CMakeLists.txt | 3 +- runtime/common/AnalogRemoteRESTQPU.h | 9 +- runtime/common/BaseRemoteRESTQPU.h | 44 --------- runtime/common/CompiledModule.h | 56 ++++++------ runtime/common/ExecutionContext.h | 2 +- runtime/cudaq/CMakeLists.txt | 2 - runtime/cudaq/algorithms/launch.h | 3 +- runtime/cudaq/algorithms/sample/policy.h | 2 +- runtime/cudaq/platform/default/DefaultQPU.cpp | 24 +++-- runtime/cudaq/platform/default/DefaultQPU.h | 11 +-- .../platform/default/rest/RemoteRESTQPU.cpp | 89 ++++++++++--------- .../platform/default/rest/RemoteRESTQPU.h | 11 +-- .../cudaq/platform/fermioniq/FermioniqQPU.cpp | 64 ++++++------- .../cudaq/platform/fermioniq/FermioniqQPU.h | 12 +-- .../platform/mqpu/custatevec/GPUEmulatedQPU.h | 2 +- .../cudaq/platform/orca/OrcaRemoteRESTQPU.cpp | 5 +- .../cudaq/platform/orca/OrcaRemoteRESTQPU.h | 14 +-- runtime/cudaq/platform/qpu.cpp | 18 ++-- runtime/cudaq/platform/qpu.h | 12 +-- runtime/cudaq/platform/quantum_platform.cpp | 29 ++++-- runtime/cudaq/platform/quantum_platform.h | 2 +- runtime/internal/compiler/CMakeLists.txt | 19 ++-- .../compiler/CompiledModuleHelper.cpp | 14 +++ .../compiler/CompiledModuleHelper.h | 9 ++ .../common/ExecutionContextThreadTester.cpp | 2 +- 25 files changed, 205 insertions(+), 253 deletions(-) diff --git a/python/runtime/interop/CMakeLists.txt b/python/runtime/interop/CMakeLists.txt index 02135cd4980..6e1bb28a35b 100644 --- a/python/runtime/interop/CMakeLists.txt +++ b/python/runtime/interop/CMakeLists.txt @@ -15,8 +15,7 @@ target_include_directories(cudaq-python-interop PRIVATE ${Python3_INCLUDE_DIRS} ) target_link_libraries(cudaq-python-interop - PRIVATE nanobind-static Python3::Module cudaq - PUBLIC cudaq-mlir-runtime-headers) + PRIVATE nanobind-static Python3::Module cudaq) install (FILES PythonCppInterop.h PythonCppInteropDecls.h DESTINATION include/cudaq/python/) install(TARGETS cudaq-python-interop EXPORT cudaq-python-interop-targets DESTINATION lib) diff --git a/runtime/common/AnalogRemoteRESTQPU.h b/runtime/common/AnalogRemoteRESTQPU.h index ddf0becc303..1be6006945a 100644 --- a/runtime/common/AnalogRemoteRESTQPU.h +++ b/runtime/common/AnalogRemoteRESTQPU.h @@ -26,14 +26,9 @@ class AnalogRemoteRESTQPU : public BaseRemoteRESTQPU { /// @brief Launch a kernel with the given arguments /// Only analog Hamiltonian kernels are supported - KernelThunkResultType unifiedLaunchModule(const AnyModule &module, + KernelThunkResultType unifiedLaunchModule(const CompiledModule &module, KernelArgs args) override { - if (!std::holds_alternative(module)) - throw std::runtime_error( - "AnalogRemoteRESTQPU does not support pre-compiled module launch."); - - const auto &src = std::get(module); - const auto &kernelName = src.getName(); + const auto &kernelName = module.getName(); auto executionContext = cudaq::getExecutionContext(); if (!cudaq::detail::isAnalogHamiltonianKernel(kernelName)) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index c00cc025802..7dda81d649a 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -14,7 +14,6 @@ #include "common/KernelExecution.h" #include "common/Resources.h" #include "common/ServerHelper.h" -#include "cudaq_internal/compiler/Compiler.h" #include "nvqir/AnalysisScope.h" #include "nvqir/resourcecounter/ResourceCounterScope.h" #include "cudaq/Target/TargetConfig.h" @@ -56,8 +55,6 @@ inline observe_result observeResultFromCounts(const observe_policy &policy, class BaseRemoteRESTQPU : public QPU { protected: - using Compiler = cudaq_internal::compiler::Compiler; - /// The number of shots std::optional nShots; @@ -280,47 +277,6 @@ class BaseRemoteRESTQPU : public QPU { return target; } - /// @brief Build the list of kernel executions for the given module under - /// a specific sampling policy. Source modules are lowered through the - /// configured pass pipeline; pre-compiled modules are emitted directly. - /// The resolved kernel name is returned via @p kernelName. - template - std::pair> - compileKernelExecutions(Policy &policy, const AnyModule &module, - KernelArgs args) { - Compiler compiler(getCompileTarget(policy)); - std::string kernelName; - std::optional compiled; - if (std::holds_alternative(module)) { - const auto &src = std::get(module); - kernelName = src.getName(); - CUDAQ_INFO("launching remote rest kernel ({})", kernelName); - - auto [moduleOp, context] = Compiler::loadQuakeCodeByName(kernelName); - - compiled = compiler.runPassPipeline(kernelName, moduleOp, args, true, - std::move(context)); - if constexpr (std::is_same_v) { - if (compiler.hasWarnedNamedMeasurements()) - policy.warnedNamedMeasurements = true; - } - } else { - compiled = std::get(module); - kernelName = compiled->getName(); - CUDAQ_INFO("launching remote rest kernel via module ({})", kernelName); - } - - auto codes = compiler.emitKernelExecutions(*compiled); - - // Propagate metadata from the compiled artifact to the execution context. - if (auto ctx = getExecutionContext()) { - ctx->hasConditionalsOnMeasureResults = - compiled->getMetadata().hasConditionalsOnMeasureResults; - } - - return {kernelName, codes}; - } - void completeLaunchKernel(const std::string &kernelName, std::vector &&codes) { auto executionContext = cudaq::getExecutionContext(); diff --git a/runtime/common/CompiledModule.h b/runtime/common/CompiledModule.h index 9b4b9d11fb0..af366d4d78e 100644 --- a/runtime/common/CompiledModule.h +++ b/runtime/common/CompiledModule.h @@ -263,29 +263,6 @@ class FatQuakeModule { ArtifactsStore artifacts; }; -/// @brief A compiled MLIR module, ready for execution or code generation. -/// -/// Contains any number of named compilation artifacts (we currently support -/// JIT binaries, optimized MLIR modules, and pre-computed resource metrics) -/// that result from the compilation of a Quake MLIR module. -/// -/// This type does not depend on MLIR/LLVM — it only keeps type-erased / opaque -/// pointers. Build instances with -/// `cudaq_internal::compiler::CompiledModuleHelper`. -class CompiledModule : public FatQuakeModule { -public: - using CompilationMetadata = FatQuakeModule::CompilationMetadata; - -private: - friend class cudaq_internal::compiler::CompiledModuleHelper; - - CompiledModule(std::string kernelName) - : FatQuakeModule(std::move(kernelName)) {} - -public: - CompiledModule() : FatQuakeModule(std::string{}) {} -}; - /// Bundle of artifacts that define a CUDA-Q kernel to be compiled and executed. /// /// Contains either a `nvq++`-compiled function pointer or an MLIR module, @@ -309,10 +286,33 @@ class SourceModule : public FatQuakeModule { const void *getMlirOpaqueModulePtr() const; }; -// TODO: remove once C++ launch can be cleanly split into compilation + launch. -// Used by unifiedLaunchModule to compile kernels if they have not been compiled -// before. In the future, unifiedLaunchModule should only accept compiled -// modules. -using AnyModule = std::variant; +/// @brief A compiled MLIR module, ready for execution or code generation. +/// +/// Contains any number of named compilation artifacts (we currently support +/// JIT binaries, optimized MLIR modules, and pre-computed resource metrics) +/// that result from the compilation of a Quake MLIR module. +/// +/// This type does not depend on MLIR/LLVM — it only keeps type-erased / opaque +/// pointers. Build instances with +/// `cudaq_internal::compiler::CompiledModuleHelper`. +class CompiledModule : public FatQuakeModule { +public: + using CompilationMetadata = FatQuakeModule::CompilationMetadata; + +private: + friend class cudaq_internal::compiler::CompiledModuleHelper; + + CompiledModule(std::string kernelName) + : FatQuakeModule(std::move(kernelName)) {} + +public: + // The choice of constructors is intentionally limited to: + // - empty compiled modules for default construction + // - compiled module from a source module to explicitly bypass the compiler + // For any other use case, you should go through the factory methods in + // `CompiledModuleHelper`. + CompiledModule() : FatQuakeModule(std::string{}) {} + explicit CompiledModule(SourceModule src) : FatQuakeModule(std::move(src)) {} +}; } // namespace cudaq diff --git a/runtime/common/ExecutionContext.h b/runtime/common/ExecutionContext.h index d1f41164d01..baf5710ebbd 100644 --- a/runtime/common/ExecutionContext.h +++ b/runtime/common/ExecutionContext.h @@ -157,7 +157,7 @@ class ExecutionContext { std::optional cachedCompiledModule = std::nullopt; /// @brief Dispatcher towards the policy specific launch. - std::function + std::function executeKernelApi; /// @brief Slot for the detector error model, as `.dem` text. diff --git a/runtime/cudaq/CMakeLists.txt b/runtime/cudaq/CMakeLists.txt index 67b442e4f4c..082c4df6d81 100644 --- a/runtime/cudaq/CMakeLists.txt +++ b/runtime/cudaq/CMakeLists.txt @@ -66,7 +66,6 @@ if (CUDA_FOUND) cudaq-common cudaq-nlopt cudaq-ensmallen - cudaq-mlir-runtime-headers # Public until compilation is moved out of the QPUs cudaq-logger CUDAQTargetConfig PRIVATE @@ -94,7 +93,6 @@ else() cudaq-common cudaq-nlopt cudaq-ensmallen - cudaq-mlir-runtime-headers # Public until compilation is moved out of the QPUs cudaq-logger CUDAQTargetConfig PRIVATE diff --git a/runtime/cudaq/algorithms/launch.h b/runtime/cudaq/algorithms/launch.h index ada31657dc2..31ce59ec637 100644 --- a/runtime/cudaq/algorithms/launch.h +++ b/runtime/cudaq/algorithms/launch.h @@ -9,6 +9,7 @@ #pragma once +#include "common/CompiledModule.h" #include "common/ExecutionContext.h" #include "common/KernelArgs.h" #include "cudaq/platform.h" @@ -49,7 +50,7 @@ auto launch(Policy &policy, std::size_t qpu_id, ExecutionContext &ctx, typename Policy::result_type result; auto &qpu = platform.getQPU(qpu_id); - ctx.executeKernelApi = [&qpu, &result, &policy](const AnyModule &module, + ctx.executeKernelApi = [&qpu, &result, &policy](const CompiledModule &module, const KernelArgs &args) { result = qpu.launchKernel(policy, module, args); }; diff --git a/runtime/cudaq/algorithms/sample/policy.h b/runtime/cudaq/algorithms/sample/policy.h index 2f28e0086e3..b791e5c3163 100644 --- a/runtime/cudaq/algorithms/sample/policy.h +++ b/runtime/cudaq/algorithms/sample/policy.h @@ -37,7 +37,7 @@ struct sample_policy { /// @brief Flag to indicate that a warning about named measurement registers /// in sampling context has already been emitted. - bool warnedNamedMeasurements = false; + mutable bool warnedNamedMeasurements = false; /// @brief A vector containing information about how to reorder the global /// register after execution. Empty means no reordering. diff --git a/runtime/cudaq/platform/default/DefaultQPU.cpp b/runtime/cudaq/platform/default/DefaultQPU.cpp index e14ff483b1b..3cf1718ed2c 100644 --- a/runtime/cudaq/platform/default/DefaultQPU.cpp +++ b/runtime/cudaq/platform/default/DefaultQPU.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "DefaultQPU.h" +#include "common/CompiledModule.h" #include "common/ExecutionContext.h" #include "common/Timing.h" #include "cudaq/algorithms/policies.h" @@ -20,19 +21,14 @@ void cudaq::DefaultQPU::enqueue(QuantumTask &task) { } cudaq::KernelThunkResultType -cudaq::DefaultQPU::unifiedLaunchModule(const cudaq::AnyModule &module, +cudaq::DefaultQPU::unifiedLaunchModule(const cudaq::CompiledModule &module, cudaq::KernelArgs args) { - if (!std::holds_alternative(module)) - return runJITCompiledModule(std::get(module), args); - - const auto &src = std::get(module); ScopedTraceWithContext(cudaq::TIMING_LAUNCH, "QPU::unifiedLaunchModule"); - auto rawFn = src.getFunctionPtr(); + + auto rawFn = module.getFunctionPtr(); if (!rawFn) - throw std::runtime_error( - "DefaultQPU::unifiedLaunchModule requires a raw kernel function " - "pointer for kernel '" + - src.getName() + "'."); + return runJITCompiledModule(module, args); + auto packed = args.getPacked(); void *argData = packed ? packed->data.data() : nullptr; return rawFn->getFn()(argData, /*isRemote=*/false); @@ -40,7 +36,7 @@ cudaq::DefaultQPU::unifiedLaunchModule(const cudaq::AnyModule &module, cudaq::sample_result cudaq::DefaultQPU::launchKernel(const cudaq::sample_policy &policy, - const cudaq::AnyModule &module, + const cudaq::CompiledModule &module, cudaq::KernelArgs args) { CUDAQ_INFO("DefaultQPU::launchKernel {}", policy.name); return cudaq::ExecutionManager::with_default_em( @@ -50,7 +46,7 @@ cudaq::DefaultQPU::launchKernel(const cudaq::sample_policy &policy, cudaq::async_sample_result cudaq::DefaultQPU::launchKernel(const async_sample_policy &policy, - const cudaq::AnyModule &module, + const cudaq::CompiledModule &module, cudaq::KernelArgs args) { throw std::runtime_error( "DefaultQPU does not support launching the async_sample_policy."); @@ -58,7 +54,7 @@ cudaq::DefaultQPU::launchKernel(const async_sample_policy &policy, cudaq::observe_result cudaq::DefaultQPU::launchKernel(const cudaq::observe_policy &policy, - const cudaq::AnyModule &module, + const cudaq::CompiledModule &module, cudaq::KernelArgs args) { CUDAQ_INFO("DefaultQPU::launchKernel {}", policy.name); return cudaq::ExecutionManager::with_default_em( @@ -68,7 +64,7 @@ cudaq::DefaultQPU::launchKernel(const cudaq::observe_policy &policy, cudaq::async_observe_result cudaq::DefaultQPU::launchKernel(async_observe_policy &policy, - const cudaq::AnyModule &module, + const cudaq::CompiledModule &module, cudaq::KernelArgs args) { throw std::runtime_error( "DefaultQPU does not support launching the async_observe_policy."); diff --git a/runtime/cudaq/platform/default/DefaultQPU.h b/runtime/cudaq/platform/default/DefaultQPU.h index 4290cbfb320..795445e5f8a 100644 --- a/runtime/cudaq/platform/default/DefaultQPU.h +++ b/runtime/cudaq/platform/default/DefaultQPU.h @@ -21,22 +21,23 @@ class DefaultQPU : public QPU { void enqueue(QuantumTask &task) override; - KernelThunkResultType unifiedLaunchModule(const cudaq::AnyModule &module, + KernelThunkResultType unifiedLaunchModule(const cudaq::CompiledModule &module, cudaq::KernelArgs args) override; sample_result launchKernel(const sample_policy &policy, - const AnyModule &module, KernelArgs args) override; + const CompiledModule &module, + KernelArgs args) override; async_sample_result launchKernel(const async_sample_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; observe_result launchKernel(const observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; async_observe_result launchKernel(async_observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; std::unique_ptr diff --git a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp index c19cae79cea..6f8f9af5fb4 100644 --- a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp +++ b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp @@ -7,76 +7,79 @@ ******************************************************************************/ #include "RemoteRESTQPU.h" +#include "cudaq_internal/compiler/Compiler.h" using namespace cudaq; cudaq::RemoteRESTQPU::~RemoteRESTQPU() = default; +static std::vector +runCodegen(cudaq_internal::compiler::Compiler &compiler, + const CompiledModule &module, KernelArgs args) { + // TODO: This should be moved into compiler::compileModule, but this would add + // a dependency on the compiler in the C++ launch path. + auto compiled = module; + cudaq_internal::compiler::CompiledModuleHelper::ensureMlirArtifactsExist( + compiled, compiler, args); + + return compiler.emitKernelExecutions(compiled); +} + sample_result RemoteRESTQPU::launchKernel(const sample_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel {}", policy.name); - auto [kernelName, codes] = compileKernelExecutions(policy, module, args); - return completeLaunchKernel(policy, kernelName, std::move(codes)); + + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); + auto codes = runCodegen(compiler, module, args); + + if (compiler.hasWarnedNamedMeasurements()) + policy.warnedNamedMeasurements = true; + return completeLaunchKernel(policy, module.getName(), std::move(codes)); } async_sample_result RemoteRESTQPU::launchKernel(const async_sample_policy &policy, - const AnyModule &module, KernelArgs args) { + const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel async {}", policy.inner.name); - auto [kernelName, codes] = - compileKernelExecutions(policy.inner, module, args); - return completeLaunchKernel(policy, kernelName, std::move(codes)); + + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); + auto codes = runCodegen(compiler, module, args); + + if (compiler.hasWarnedNamedMeasurements()) + policy.inner.warnedNamedMeasurements = true; + return completeLaunchKernel(policy, module.getName(), std::move(codes)); } observe_result RemoteRESTQPU::launchKernel(const observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel {}", policy.name); - auto [kernelName, codes] = compileKernelExecutions(policy, module, args); - return completeLaunchKernel(policy, kernelName, std::move(codes)); + + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); + auto codes = runCodegen(compiler, module, args); + return completeLaunchKernel(policy, module.getName(), std::move(codes)); } async_observe_result RemoteRESTQPU::launchKernel(async_observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel async {}", policy.inner.name); - auto [kernelName, codes] = - compileKernelExecutions(policy.inner, module, args); - return completeLaunchKernel(policy, kernelName, std::move(codes)); + + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); + auto codes = runCodegen(compiler, module, args); + return completeLaunchKernel(policy, module.getName(), std::move(codes)); } KernelThunkResultType -RemoteRESTQPU::unifiedLaunchModule(const AnyModule &module, KernelArgs args) { - Compiler compiler(getCompileTarget(other_policies{}, getExecutionContext())); - - std::string kernelName; - std::optional compiled; - - if (std::holds_alternative(module)) { - const auto &src = std::get(module); - kernelName = src.getName(); - CUDAQ_INFO("launching remote rest kernel ({})", kernelName); - - auto [moduleOp, context] = Compiler::loadQuakeCodeByName(kernelName); - - // Get the Quake code, lowered according to config file. - compiled = compiler.runPassPipeline(kernelName, moduleOp, args, true, - std::move(context)); - } else { - compiled = std::get(module); - kernelName = compiled->getName(); - CUDAQ_INFO("launching remote rest kernel via module ({})", kernelName); - } - - auto codes = compiler.emitKernelExecutions(*compiled); +RemoteRESTQPU::unifiedLaunchModule(const CompiledModule &module, + KernelArgs args) { + CUDAQ_INFO("launching remote rest kernel ({})", module.getName()); - // Propagate metadata from the compiled artifact to the execution context. - if (auto ctx = getExecutionContext()) { - ctx->hasConditionalsOnMeasureResults = - compiled->getMetadata().hasConditionalsOnMeasureResults; - } + cudaq_internal::compiler::Compiler compiler( + getCompileTarget(other_policies{}, getExecutionContext())); + auto codes = runCodegen(compiler, module, args); - completeLaunchKernel(kernelName, std::move(codes)); + completeLaunchKernel(module.getName(), std::move(codes)); return {}; } diff --git a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h index bb700299030..95c7ea84da4 100644 --- a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h +++ b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h @@ -25,22 +25,23 @@ class RemoteRESTQPU : public BaseRemoteRESTQPU { /// representation required by the targeted backend. Handle all pertinent /// modifications for the execution context as well as asynchronous or /// synchronous invocation. - KernelThunkResultType unifiedLaunchModule(const AnyModule &module, + KernelThunkResultType unifiedLaunchModule(const CompiledModule &module, KernelArgs args) override; sample_result launchKernel(const sample_policy &policy, - const AnyModule &module, KernelArgs args) override; + const CompiledModule &module, + KernelArgs args) override; async_sample_result launchKernel(const async_sample_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; observe_result launchKernel(const observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; async_observe_result launchKernel(async_observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; }; diff --git a/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp b/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp index cde2e956e84..b443c5d05df 100644 --- a/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp +++ b/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp @@ -7,10 +7,9 @@ ******************************************************************************/ #include "FermioniqQPU.h" +#include "cudaq_internal/compiler/Compiler.h" #include "nlohmann/json.hpp" #include "cudaq/runtime/logger/cudaq_fmt.h" -#include -#include namespace { void attachFermioniqObservable(cudaq::KernelExecution &code, @@ -34,42 +33,27 @@ void attachFermioniqObservable(cudaq::KernelExecution &code, cudaq::FermioniqQPU::~FermioniqQPU() = default; -cudaq::sample_result -cudaq::FermioniqQPU::launchKernel(const cudaq::sample_policy &policy, - const AnyModule &module, KernelArgs args) { - auto [kernelName, codes] = compileKernelExecutions(policy, module, args); - CUDAQ_INFO("FermioniqBaseQPU launching kernel ({}) with policy {}", - kernelName, policy.name); - if (codes.size() != 1) - throw std::runtime_error("Provider only allows 1 circuit at a time."); - - return completeLaunchKernel(policy, kernelName, std::move(codes)); -} - -cudaq::async_sample_result -cudaq::FermioniqQPU::launchKernel(const cudaq::async_sample_policy &policy, - const AnyModule &module, KernelArgs args) { - auto [kernelName, codes] = - compileKernelExecutions(policy.inner, module, args); - CUDAQ_INFO("FermioniqBaseQPU launching kernel ({}) with policy {}", - kernelName, policy.inner.name); - if (codes.size() != 1) - throw std::runtime_error("Provider only allows 1 circuit at a time."); - - return completeLaunchKernel(policy, kernelName, std::move(codes)); -} - cudaq::observe_result cudaq::FermioniqQPU::launchKernel(const cudaq::observe_policy &policy, - const AnyModule &module, KernelArgs args) { - auto [kernelName, codes] = compileKernelExecutions(policy, module, args); + const CompiledModule &module, + KernelArgs args) { CUDAQ_INFO("FermioniqBaseQPU launching kernel ({}) with policy {}", - kernelName, policy.name); + module.getName(), policy.name); + + // TODO: This should be moved into compiler::compileModule, but this would add + // a dependency on the compiler in the C++ launch path. + auto compiled = module; + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); + cudaq_internal::compiler::CompiledModuleHelper::ensureMlirArtifactsExist( + compiled, compiler, args); + + auto codes = compiler.emitKernelExecutions(compiled); if (codes.size() != 1) throw std::runtime_error("Provider only allows 1 circuit at a time."); attachFermioniqObservable(codes[0], policy.spin); - auto result = completeLaunchKernel(policy, kernelName, std::move(codes)); + auto result = + completeLaunchKernel(policy, compiled.getName(), std::move(codes)); auto expectation = result.raw_data().expectation(GlobalRegisterName); return cudaq::observe_result(expectation, result.get_spin(), result.raw_data()); @@ -77,16 +61,24 @@ cudaq::FermioniqQPU::launchKernel(const cudaq::observe_policy &policy, cudaq::async_observe_result cudaq::FermioniqQPU::launchKernel(cudaq::async_observe_policy &policy, - const AnyModule &module, KernelArgs args) { - auto [kernelName, codes] = - compileKernelExecutions(policy.inner, module, args); + const CompiledModule &module, + KernelArgs args) { CUDAQ_INFO("FermioniqBaseQPU launching kernel ({}) with policy {}", - kernelName, policy.inner.name); + module.getName(), policy.inner.name); + + // TODO: This should be moved into compiler::compileModule, but this would add + // a dependency on the compiler in the C++ launch path. + auto compiled = module; + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); + cudaq_internal::compiler::CompiledModuleHelper::ensureMlirArtifactsExist( + compiled, compiler, args); + + auto codes = compiler.emitKernelExecutions(compiled); if (codes.size() != 1) throw std::runtime_error("Provider only allows 1 circuit at a time."); attachFermioniqObservable(codes[0], policy.inner.spin); - return completeLaunchKernel(policy, kernelName, std::move(codes)); + return completeLaunchKernel(policy, compiled.getName(), std::move(codes)); } CUDAQ_REGISTER_TYPE(cudaq::QPU, cudaq::FermioniqQPU, fermioniq) diff --git a/runtime/cudaq/platform/fermioniq/FermioniqQPU.h b/runtime/cudaq/platform/fermioniq/FermioniqQPU.h index 70eab9ef111..55762991585 100644 --- a/runtime/cudaq/platform/fermioniq/FermioniqQPU.h +++ b/runtime/cudaq/platform/fermioniq/FermioniqQPU.h @@ -43,19 +43,13 @@ class FermioniqQPU : public BaseRemoteRESTQPU { return target; } - sample_result launchKernel(const sample_policy &policy, - const AnyModule &module, KernelArgs args) override; - - async_sample_result launchKernel(const async_sample_policy &policy, - const AnyModule &module, - KernelArgs args) override; - + using BaseRemoteRESTQPU::launchKernel; observe_result launchKernel(const observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; async_observe_result launchKernel(async_observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) override; }; diff --git a/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h b/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h index 63dfa106ca8..eb8cbaa2d49 100644 --- a/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h +++ b/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h @@ -22,7 +22,7 @@ class GPUEmulatedQPU : public QPU { void enqueue(QuantumTask &task) override; - KernelThunkResultType unifiedLaunchModule(const cudaq::AnyModule &src, + KernelThunkResultType unifiedLaunchModule(const cudaq::CompiledModule &src, cudaq::KernelArgs args) override; void configureExecutionContext(ExecutionContext &context) const override; diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp index df3bc23de88..cf9368c631b 100644 --- a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp +++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp @@ -62,8 +62,9 @@ void cudaq::OrcaRemoteRESTQPU::setTargetBackend(const std::string &backend) { executor->setServerHelper(serverHelper.get()); } -KernelThunkResultType cudaq::OrcaRemoteRESTQPU::launchKernelCommon( - const std::string &kernelName, KernelThunkType kernelFunc, void *args) { +KernelThunkResultType +cudaq::OrcaRemoteRESTQPU::launchKernelCommon(const std::string &kernelName, + void *args) { CUDAQ_INFO("OrcaRemoteRESTQPU: Launch kernel named '{}' remote QPU {}", kernelName, qpu_id); diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h index f92d89fa2e8..de5953ee534 100644 --- a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h +++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h @@ -91,23 +91,15 @@ class OrcaRemoteRESTQPU : public cudaq::QPU { void setTargetBackend(const std::string &backend) override; [[nodiscard]] KernelThunkResultType - launchKernelCommon(const std::string &kernelName, KernelThunkType kernelFunc, - void *args); + launchKernelCommon(const std::string &kernelName, void *args); /// @brief Launch the kernel. Handle all pertinent modifications for the /// execution context. [[nodiscard]] KernelThunkResultType - unifiedLaunchModule(const AnyModule &module, KernelArgs args) override { - if (!std::holds_alternative(module)) - throw std::runtime_error( - "OrcaRemoteRESTQPU does not support pre-compiled module launch."); - - const auto &src = std::get(module); - auto rawFn = src.getFunctionPtr(); - KernelThunkType kernelFunc = rawFn ? rawFn->getFn() : nullptr; + unifiedLaunchModule(const CompiledModule &module, KernelArgs args) override { auto packed = args.getPacked(); void *argData = packed ? packed->data.data() : nullptr; - return launchKernelCommon(src.getName(), kernelFunc, argData); + return launchKernelCommon(module.getName(), argData); } }; } // namespace cudaq diff --git a/runtime/cudaq/platform/qpu.cpp b/runtime/cudaq/platform/qpu.cpp index c438ec30986..3ecd30ca8dc 100644 --- a/runtime/cudaq/platform/qpu.cpp +++ b/runtime/cudaq/platform/qpu.cpp @@ -25,39 +25,33 @@ using namespace cudaq_internal::compiler; using namespace cudaq; cudaq::KernelThunkResultType -cudaq::QPU::unifiedLaunchModule(const AnyModule &module, KernelArgs args) { - if (std::holds_alternative(module)) - throw std::runtime_error( - "This QPU does not support launching uncompiled SourceModule kernels; " - "subclasses must override unifiedLaunchModule."); - - const auto &compiled = std::get(module); - return runJITCompiledModule(compiled, args); +cudaq::QPU::unifiedLaunchModule(const CompiledModule &module, KernelArgs args) { + return runJITCompiledModule(module, args); } sample_result cudaq::QPU::launchKernel(const sample_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { throw std::runtime_error( "This QPU does not support launching the sample_policy."); } async_sample_result cudaq::QPU::launchKernel(const async_sample_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { throw std::runtime_error( "This QPU does not support launching the async_sample_policy."); } observe_result cudaq::QPU::launchKernel(const observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { throw std::runtime_error( "This QPU does not support launching the observe_policy."); } async_observe_result cudaq::QPU::launchKernel(async_observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args) { throw std::runtime_error( "This QPU does not support launching the async_observe_policy."); diff --git a/runtime/cudaq/platform/qpu.h b/runtime/cudaq/platform/qpu.h index 79175272b1e..a8a9fcb67d1 100644 --- a/runtime/cudaq/platform/qpu.h +++ b/runtime/cudaq/platform/qpu.h @@ -139,21 +139,23 @@ class QPU : public registry::RegisteredType { const std::size_t shots) {} virtual sample_result launchKernel(const sample_policy &policy, - const AnyModule &module, KernelArgs args); + const CompiledModule &module, + KernelArgs args); virtual async_sample_result launchKernel(const async_sample_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args); virtual observe_result launchKernel(const observe_policy &policy, - const AnyModule &module, KernelArgs args); + const CompiledModule &module, + KernelArgs args); virtual async_observe_result launchKernel(async_observe_policy &policy, - const AnyModule &module, + const CompiledModule &module, KernelArgs args); [[nodiscard]] virtual KernelThunkResultType - unifiedLaunchModule(const AnyModule &module, KernelArgs args); + unifiedLaunchModule(const CompiledModule &module, KernelArgs args); /// Get the compile target of the QPU for the given policy. [[nodiscard]] virtual std::unique_ptr diff --git a/runtime/cudaq/platform/quantum_platform.cpp b/runtime/cudaq/platform/quantum_platform.cpp index bd04812a2bc..a0f85a96261 100644 --- a/runtime/cudaq/platform/quantum_platform.cpp +++ b/runtime/cudaq/platform/quantum_platform.cpp @@ -274,8 +274,8 @@ quantum_platform::get_remote_capabilities(std::size_t qpu_id) const { } KernelThunkResultType -quantum_platform::unifiedLaunchModule(const AnyModule &module, KernelArgs args, - std::size_t qpu_id) { +quantum_platform::unifiedLaunchModule(const CompiledModule &module, + KernelArgs args, std::size_t qpu_id) { validateQpuId(qpu_id); auto &qpu = platformQPUs[qpu_id]; return qpu->unifiedLaunchModule(module, args); @@ -338,13 +338,16 @@ cudaq::altLaunchKernel(const char *kernelName, std::string kernName = kernelName; KernelArgs args{KernelArgs::PackedArgs{kernelArgs, argsSize, resultOffset}}; SourceModule src{kernName, kernelFunc}; + // TODO: we are bypassing the compiler to avoid a dependency on the compiler. + // This delays compilation until inside the QPU. + CompiledModule compiled{src}; auto ctx = cudaq::getExecutionContext(); if (ctx && ctx->executeKernelApi) { - ctx->executeKernelApi(src, args); + ctx->executeKernelApi(compiled, args); return {}; } std::size_t qpu_id = cudaq::getCurrentQpuId(); - return platform.unifiedLaunchModule(src, args, qpu_id); + return platform.unifiedLaunchModule(compiled, args, qpu_id); } cudaq::KernelThunkResultType @@ -355,14 +358,18 @@ cudaq::streamlinedLaunchKernel(const char *kernelName, std::string kernName = kernelName; KernelArgs args{rawArgs}; SourceModule src{kernName}; + // TODO: we are bypassing the compiler to avoid a dependency on the compiler. + // This delays compilation until inside the QPU. + CompiledModule compiled{src}; auto ctx = cudaq::getExecutionContext(); if (ctx && ctx->executeKernelApi) { - ctx->executeKernelApi(src, args); + ctx->executeKernelApi(compiled, args); return {}; } auto &platform = *getQuantumPlatformInternal(); std::size_t qpu_id = cudaq::getCurrentQpuId(); - [[maybe_unused]] auto r = platform.unifiedLaunchModule(src, args, qpu_id); + [[maybe_unused]] auto r = + platform.unifiedLaunchModule(compiled, args, qpu_id); // NB: The streamlined launch will never return results. Use alt or hybrid if // the kernel returns results. return {}; @@ -394,6 +401,9 @@ cudaq::hybridLaunchKernel(const char *kernelName, cudaq::KernelThunkType kernel, const std::string kernName = kernelName; std::size_t qpu_id = cudaq::getCurrentQpuId(); SourceModule src{kernName, kernel}; + // TODO: we are bypassing the compiler to avoid a dependency on the compiler. + // This delays compilation until inside the QPU. + CompiledModule compiled{src}; KernelArgs kargs = platform.is_remote(qpu_id) ? KernelArgs{rawArgs} @@ -401,14 +411,15 @@ cudaq::hybridLaunchKernel(const char *kernelName, cudaq::KernelThunkType kernel, auto ctx = cudaq::getExecutionContext(); if (ctx && ctx->executeKernelApi) { - ctx->executeKernelApi(src, kargs); + ctx->executeKernelApi(compiled, kargs); return {}; } if (platform.is_remote(qpu_id)) { // This path should never call a kernel that returns results. - [[maybe_unused]] auto r = platform.unifiedLaunchModule(src, kargs, qpu_id); + [[maybe_unused]] auto r = + platform.unifiedLaunchModule(compiled, kargs, qpu_id); return {}; } - return platform.unifiedLaunchModule(src, kargs, qpu_id); + return platform.unifiedLaunchModule(compiled, kargs, qpu_id); } diff --git a/runtime/cudaq/platform/quantum_platform.h b/runtime/cudaq/platform/quantum_platform.h index 46811abccce..512d9c706da 100644 --- a/runtime/cudaq/platform/quantum_platform.h +++ b/runtime/cudaq/platform/quantum_platform.h @@ -204,7 +204,7 @@ class quantum_platform { const std::size_t shots, std::size_t qpu_id = 0); [[nodiscard]] KernelThunkResultType - unifiedLaunchModule(const AnyModule &module, KernelArgs args, + unifiedLaunchModule(const CompiledModule &module, KernelArgs args, std::size_t qpu_id = 0); template diff --git a/runtime/internal/compiler/CMakeLists.txt b/runtime/internal/compiler/CMakeLists.txt index 2787ddbf279..bb7c47b5819 100644 --- a/runtime/internal/compiler/CMakeLists.txt +++ b/runtime/internal/compiler/CMakeLists.txt @@ -8,14 +8,6 @@ include(HandleLLVMOptions) -# Header-only target: public headers under include/cudaq_internal/compiler/. -# Link this when you need declarations only (no cudaq-mlir-runtime .so). -add_library(cudaq-mlir-runtime-headers INTERFACE) -target_include_directories(cudaq-mlir-runtime-headers INTERFACE - $ - $ -) - # MLIR/LLVM runtime support shared with cudaq-builder and remote platforms. # Single library so LLVM static Options are initialized at most once. @@ -38,9 +30,13 @@ set_source_files_properties( ) set_property(GLOBAL APPEND PROPERTY CUDAQ_RUNTIME_LIBS cudaq-mlir-runtime) -target_link_libraries(cudaq-mlir-runtime +target_include_directories(cudaq-mlir-runtime PUBLIC - cudaq-mlir-runtime-headers + $ + $ +) + +target_link_libraries(cudaq-mlir-runtime PRIVATE cudaq-common $<$>:cudaq-qir-verifier> @@ -84,9 +80,6 @@ if(APPLE) "LINKER:-flat_namespace") endif() -install(TARGETS cudaq-mlir-runtime-headers - EXPORT cudaq-mlir-runtime-targets) - install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION include) diff --git a/runtime/internal/compiler/CompiledModuleHelper.cpp b/runtime/internal/compiler/CompiledModuleHelper.cpp index 0218d50f843..7b0cadd04bc 100644 --- a/runtime/internal/compiler/CompiledModuleHelper.cpp +++ b/runtime/internal/compiler/CompiledModuleHelper.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "cudaq_internal/compiler/CompiledModuleHelper.h" +#include "cudaq_internal/compiler/Compiler.h" #include "cudaq_internal/compiler/LayoutInfo.h" #include "cudaq/Optimizer/Builder/RuntimeNames.h" #include "mlir/IR/BuiltinOps.h" @@ -95,4 +96,17 @@ CompiledModule CompiledModuleHelper::createCompiledModule( return compiled; } +void CompiledModuleHelper::ensureMlirArtifactsExist( + cudaq::CompiledModule &module, Compiler &compiler, cudaq::KernelArgs args) { + if (!module.getMlirArtifacts().empty()) + return; + + auto [moduleOp, context] = + cudaq_internal::compiler::Compiler::loadQuakeCodeByName(module.getName()); + + auto compiled = compiler.runPassPipeline(module.getName(), moduleOp, args, + true, std::move(context)); + for (const auto &[name, artifact] : compiled.getArtifacts()) + module.addArtifact(name, std::move(artifact)); +} } // namespace cudaq_internal::compiler diff --git a/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h b/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h index e04383d1214..1e5897c4c13 100644 --- a/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h +++ b/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h @@ -8,6 +8,7 @@ #pragma once #include "common/CompiledModule.h" +#include "common/KernelArgs.h" #include namespace mlir { @@ -18,6 +19,8 @@ class ModuleOp; namespace cudaq_internal::compiler { +class Compiler; + /// Compiler-side helper for `cudaq::CompiledModule`: static factory methods and /// utilities that depend on MLIR but pair with the MLIR-free `CompiledModule` /// API in `common/CompiledModule.h`. @@ -76,6 +79,12 @@ class CompiledModuleHelper { std::string name, cudaq::ResultInfo resultInfo, std::vector compiledArtifacts, cudaq::CompiledModule::CompilationMetadata metadata = {}); + + /// Ensure that the module has an MLIR artifact. If it doesn't, load it using + /// `loadQuakeCodeByName` and compile it. + static void ensureMlirArtifactsExist(cudaq::CompiledModule &module, + Compiler &compiler, + cudaq::KernelArgs args); }; } // namespace cudaq_internal::compiler diff --git a/unittests/common/ExecutionContextThreadTester.cpp b/unittests/common/ExecutionContextThreadTester.cpp index 8c853759576..1c2383be158 100644 --- a/unittests/common/ExecutionContextThreadTester.cpp +++ b/unittests/common/ExecutionContextThreadTester.cpp @@ -25,7 +25,7 @@ class DummyQPU : public cudaq::QPU { void enqueue(cudaq::QuantumTask &task) override {} cudaq::KernelThunkResultType - unifiedLaunchModule(const cudaq::AnyModule &module, + unifiedLaunchModule(const cudaq::CompiledModule &module, cudaq::KernelArgs args) override { return {}; } From b668ceb235e21c8baf93e8d823a4bb2f1ee0ad86 Mon Sep 17 00:00:00 2001 From: Luca Mondada Date: Tue, 9 Jun 2026 16:29:56 +0200 Subject: [PATCH 2/2] Policy launches call compileModule in launch.h Signed-off-by: Luca Mondada --- cudaq/include/cudaq/Target/CompileTarget.h | 11 +++ cudaq/tools/nvqpp/nvq++.in | 2 + runtime/common/AnalogRemoteRESTQPU.h | 9 ++- runtime/common/BaseRemoteRESTQPU.h | 2 +- runtime/common/CompiledModule.h | 4 ++ runtime/common/ExecutionContext.h | 6 +- runtime/cudaq/algorithms/launch.h | 40 ++++++++++- runtime/cudaq/platform.h | 14 ++-- runtime/cudaq/platform/default/DefaultQPU.cpp | 29 ++++---- runtime/cudaq/platform/default/DefaultQPU.h | 4 +- runtime/cudaq/platform/default/python/QPU.cpp | 8 +-- .../platform/default/rest/RemoteRESTQPU.cpp | 72 +++++++++++-------- .../platform/default/rest/RemoteRESTQPU.h | 5 +- .../cudaq/platform/fermioniq/FermioniqQPU.cpp | 30 ++++---- .../cudaq/platform/fermioniq/FermioniqQPU.h | 2 +- .../platform/mqpu/custatevec/GPUEmulatedQPU.h | 2 +- .../cudaq/platform/orca/OrcaRemoteRESTQPU.h | 10 ++- runtime/cudaq/platform/qpu.cpp | 11 +-- runtime/cudaq/platform/qpu.h | 4 +- runtime/cudaq/platform/quantum_platform.cpp | 46 +++++++----- .../compiler/CompiledModuleHelper.cpp | 14 ++-- runtime/internal/compiler/Compiler.cpp | 23 ++++++ .../compiler/CompiledModuleHelper.h | 8 +-- .../cudaq_internal/compiler/Compiler.h | 5 ++ .../TargetConfig/check_disable_mlir_links.cpp | 5 ++ unittests/CMakeLists.txt | 4 ++ .../common/ExecutionContextThreadTester.cpp | 2 +- 27 files changed, 248 insertions(+), 124 deletions(-) diff --git a/cudaq/include/cudaq/Target/CompileTarget.h b/cudaq/include/cudaq/Target/CompileTarget.h index ffd1371f394..cb741f469ed 100644 --- a/cudaq/include/cudaq/Target/CompileTarget.h +++ b/cudaq/include/cudaq/Target/CompileTarget.h @@ -21,6 +21,17 @@ class CompileTarget { /// Hook to update the pass pipeline before compilation. virtual void updatePassPipeline(std::string &passPipeline) const {} + /// Whether to recompile the kernel in the presence of an AOT-compiled module. + /// + /// If this is `false` and an AOT-compiled kernel (in the form of a function + /// pointer) is provided, then compilation will be skipped and all other + /// options in this class will be ignored. + /// + /// If this is `true`, the AOT-compiled module (if it exists) will be + /// discarded and compilation will start from scratch, according to the + /// options in this class. + bool overrideAOTCompilation = false; + /// Resolved MLIR pass-pipeline and `codegen` settings. struct PipelineConfig { /// If set, override compilation pipeline with this string. diff --git a/cudaq/tools/nvqpp/nvq++.in b/cudaq/tools/nvqpp/nvq++.in index c7d7bd05d77..7bbbfac9fd7 100644 --- a/cudaq/tools/nvqpp/nvq++.in +++ b/cudaq/tools/nvqpp/nvq++.in @@ -724,6 +724,8 @@ NVQIR_LIBS="${NVQIR_LIBS}${NVQIR_SIMULATION_BACKEND}" # Add the MLIR-related libraries if requested. if ${ENABLE_MLIR_LIB_LINKING}; then LINKLIBS="${LINKLIBS} -lcudaq-mlir-runtime -lcudaq-builder" +else + PREPROCESSOR_DEFINES="${PREPROCESSOR_DEFINES} -D CUDAQ_DISABLE_JIT_COMPILER" fi if ${ENABLE_REALTIME_LOWERING}; then diff --git a/runtime/common/AnalogRemoteRESTQPU.h b/runtime/common/AnalogRemoteRESTQPU.h index 1be6006945a..ddf0becc303 100644 --- a/runtime/common/AnalogRemoteRESTQPU.h +++ b/runtime/common/AnalogRemoteRESTQPU.h @@ -26,9 +26,14 @@ class AnalogRemoteRESTQPU : public BaseRemoteRESTQPU { /// @brief Launch a kernel with the given arguments /// Only analog Hamiltonian kernels are supported - KernelThunkResultType unifiedLaunchModule(const CompiledModule &module, + KernelThunkResultType unifiedLaunchModule(const AnyModule &module, KernelArgs args) override { - const auto &kernelName = module.getName(); + if (!std::holds_alternative(module)) + throw std::runtime_error( + "AnalogRemoteRESTQPU does not support pre-compiled module launch."); + + const auto &src = std::get(module); + const auto &kernelName = src.getName(); auto executionContext = cudaq::getExecutionContext(); if (!cudaq::detail::isAnalogHamiltonianKernel(kernelName)) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 7dda81d649a..f66c01e069a 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -470,7 +470,7 @@ class BaseRemoteRESTQPU : public QPU { } async_observe_result - completeLaunchKernel(async_observe_policy &policy, + completeLaunchKernel(const async_observe_policy &policy, const std::string &kernelName, std::vector &&codes) { std::size_t localShots = 1000; diff --git a/runtime/common/CompiledModule.h b/runtime/common/CompiledModule.h index af366d4d78e..d5ec2966700 100644 --- a/runtime/common/CompiledModule.h +++ b/runtime/common/CompiledModule.h @@ -268,6 +268,8 @@ class FatQuakeModule { /// Contains either a `nvq++`-compiled function pointer or an MLIR module, /// depending on the provenance of the kernel. class SourceModule : public FatQuakeModule { + friend class cudaq_internal::compiler::CompiledModuleHelper; + public: SourceModule(std::string kernelName) : FatQuakeModule(std::move(kernelName)) {} @@ -315,4 +317,6 @@ class CompiledModule : public FatQuakeModule { explicit CompiledModule(SourceModule src) : FatQuakeModule(std::move(src)) {} }; +using AnyModule = std::variant; + } // namespace cudaq diff --git a/runtime/common/ExecutionContext.h b/runtime/common/ExecutionContext.h index baf5710ebbd..9535ff0cc14 100644 --- a/runtime/common/ExecutionContext.h +++ b/runtime/common/ExecutionContext.h @@ -157,7 +157,7 @@ class ExecutionContext { std::optional cachedCompiledModule = std::nullopt; /// @brief Dispatcher towards the policy specific launch. - std::function + std::function executeKernelApi; /// @brief Slot for the detector error model, as `.dem` text. @@ -205,8 +205,8 @@ void resetExecutionContext(); /// @brief Execute the given function within the given policy and execution /// context. template -auto with_policy_and_ctx(Policy &policy, ExecutionContext &ctx, Callable &&f, - Args &&...args) +auto with_policy_and_ctx(const Policy &policy, ExecutionContext &ctx, + Callable &&f, Args &&...args) -> std::invoke_result_t { // Save the outer execution context (if any) so we can restore it after. diff --git a/runtime/cudaq/algorithms/launch.h b/runtime/cudaq/algorithms/launch.h index 31ce59ec637..118adc5bc3c 100644 --- a/runtime/cudaq/algorithms/launch.h +++ b/runtime/cudaq/algorithms/launch.h @@ -18,13 +18,34 @@ #include "cudaq/runtime/logger/logger.h" #include "cudaq/utils/cudaq_utils.h" #include +#include + +namespace cudaq_internal::compiler { +template +cudaq::CompiledModule +compileModule(const Policy &policy, + std::unique_ptr target, + const cudaq::SourceModule &src, cudaq::KernelArgs args, + bool isEntryPoint = true); +} // namespace cudaq_internal::compiler + +// If JIT compilation is disabled, make compilation a no-op. QPUs may throw an +// error if they expect a JIT-compiled module. +#ifdef CUDAQ_DISABLE_JIT_COMPILER +template +cudaq::CompiledModule cudaq_internal::compiler::compileModule( + const Policy &policy, std::unique_ptr target, + const cudaq::SourceModule &src, cudaq::KernelArgs args, bool isEntryPoint) { + return cudaq::CompiledModule{src}; +} +#endif namespace cudaq { namespace detail { /// @brief Execute the given function within the given execution context. template -auto launch(Policy &policy, std::size_t qpu_id, ExecutionContext &ctx, +auto launch(const Policy &policy, std::size_t qpu_id, ExecutionContext &ctx, quantum_platform &platform, Callable &&f, Args &&...args) -> Policy::result_type { @@ -50,9 +71,22 @@ auto launch(Policy &policy, std::size_t qpu_id, ExecutionContext &ctx, typename Policy::result_type result; auto &qpu = platform.getQPU(qpu_id); - ctx.executeKernelApi = [&qpu, &result, &policy](const CompiledModule &module, + ctx.executeKernelApi = [&qpu, &result, &policy](const AnyModule &module, const KernelArgs &args) { - result = qpu.launchKernel(policy, module, args); + CompiledModule compiled; + if (const auto *source = std::get_if(&module)) { + std::unique_ptr target; + if constexpr (requires { policy.inner; }) { + target = cudaq::get_compile_target(policy.inner); + } else { + target = cudaq::get_compile_target(policy); + } + compiled = cudaq_internal::compiler::compileModule( + policy, std::move(target), *source, args); + } else { + compiled = std::get(module); + } + result = qpu.launchKernel(policy, compiled, args); }; if constexpr (requires { policy.inner; }) diff --git a/runtime/cudaq/platform.h b/runtime/cudaq/platform.h index 70c69a3065a..cd0f903060a 100644 --- a/runtime/cudaq/platform.h +++ b/runtime/cudaq/platform.h @@ -47,14 +47,18 @@ std::unique_ptr get_compile_target(const Policy &policy) { return getQuantumPlatformInternal()->getCompileTarget(policy); } -/// Get the default compile target configuration used when JITing for Python. +/// Get the default compile target configuration +/// +/// This is suitable for local simulators, i.e. it will use +/// AOT-compiled modules as-is if they exist, and otherwise JIT-compile the +/// module as appropriate for a Python kernel. std::unique_ptr -getDefaultPythonCompileTarget(const sample_policy &policy); +getDefaultCompileTarget(const sample_policy &policy); std::unique_ptr -getDefaultPythonCompileTarget(const observe_policy &policy); +getDefaultCompileTarget(const observe_policy &policy); std::unique_ptr -getDefaultPythonCompileTarget(const other_policies &policy, - ExecutionContext *context); +getDefaultCompileTarget(const other_policies &policy, + ExecutionContext *context); // Declare this function, implemented elsewhere std::string getQIR(const std::string &); diff --git a/runtime/cudaq/platform/default/DefaultQPU.cpp b/runtime/cudaq/platform/default/DefaultQPU.cpp index 3cf1718ed2c..db732f37979 100644 --- a/runtime/cudaq/platform/default/DefaultQPU.cpp +++ b/runtime/cudaq/platform/default/DefaultQPU.cpp @@ -21,13 +21,20 @@ void cudaq::DefaultQPU::enqueue(QuantumTask &task) { } cudaq::KernelThunkResultType -cudaq::DefaultQPU::unifiedLaunchModule(const cudaq::CompiledModule &module, +cudaq::DefaultQPU::unifiedLaunchModule(const cudaq::AnyModule &module, cudaq::KernelArgs args) { ScopedTraceWithContext(cudaq::TIMING_LAUNCH, "QPU::unifiedLaunchModule"); - auto rawFn = module.getFunctionPtr(); - if (!rawFn) - return runJITCompiledModule(module, args); + std::optional rawFn; + if (std::holds_alternative(module)) { + rawFn = std::get(module).getFunctionPtr(); + assert(rawFn && "SourceModule must have a valid AOT-compiled thunk"); + } else { + auto &compiled = std::get(module); + rawFn = compiled.getFunctionPtr(); + if (!rawFn) + return runJITCompiledModule(compiled, args); + } auto packed = args.getPacked(); void *argData = packed ? packed->data.data() : nullptr; @@ -63,7 +70,7 @@ cudaq::DefaultQPU::launchKernel(const cudaq::observe_policy &policy, } cudaq::async_observe_result -cudaq::DefaultQPU::launchKernel(async_observe_policy &policy, +cudaq::DefaultQPU::launchKernel(const async_observe_policy &policy, const cudaq::CompiledModule &module, cudaq::KernelArgs args) { throw std::runtime_error( @@ -72,24 +79,18 @@ cudaq::DefaultQPU::launchKernel(async_observe_policy &policy, std::unique_ptr cudaq::DefaultQPU::getCompileTarget(const sample_policy &policy) { - // Currently this is only used for Python kernels, as C++ kernels skip JIT - // compilation and call the AOT-generated function directly. - return getDefaultPythonCompileTarget(policy); + return getDefaultCompileTarget(policy); } std::unique_ptr cudaq::DefaultQPU::getCompileTarget(const observe_policy &policy) { - // Currently this is only used for Python kernels, as C++ kernels skip JIT - // compilation and call the AOT-generated function directly. - return getDefaultPythonCompileTarget(policy); + return getDefaultCompileTarget(policy); } std::unique_ptr cudaq::DefaultQPU::getCompileTarget(const other_policies &policy, ExecutionContext *context) { - // Currently this is only used for Python kernels, as C++ kernels skip JIT - // compilation and call the AOT-generated function directly. - return getDefaultPythonCompileTarget(policy, context); + return getDefaultCompileTarget(policy, context); } void cudaq::DefaultQPU::configureExecutionContext( diff --git a/runtime/cudaq/platform/default/DefaultQPU.h b/runtime/cudaq/platform/default/DefaultQPU.h index 795445e5f8a..8dac5e23c1e 100644 --- a/runtime/cudaq/platform/default/DefaultQPU.h +++ b/runtime/cudaq/platform/default/DefaultQPU.h @@ -21,7 +21,7 @@ class DefaultQPU : public QPU { void enqueue(QuantumTask &task) override; - KernelThunkResultType unifiedLaunchModule(const cudaq::CompiledModule &module, + KernelThunkResultType unifiedLaunchModule(const cudaq::AnyModule &module, cudaq::KernelArgs args) override; sample_result launchKernel(const sample_policy &policy, @@ -36,7 +36,7 @@ class DefaultQPU : public QPU { const CompiledModule &module, KernelArgs args) override; - async_observe_result launchKernel(async_observe_policy &policy, + async_observe_result launchKernel(const async_observe_policy &policy, const CompiledModule &module, KernelArgs args) override; diff --git a/runtime/cudaq/platform/default/python/QPU.cpp b/runtime/cudaq/platform/default/python/QPU.cpp index 32c8444771c..673f1666182 100644 --- a/runtime/cudaq/platform/default/python/QPU.cpp +++ b/runtime/cudaq/platform/default/python/QPU.cpp @@ -43,8 +43,8 @@ std::string cudaq::detail::lower_to_qir_llvm(const std::string &name, const std::string &format) { ScopedTraceWithContext(cudaq::TIMING_JIT, "getQIR", name); - auto target = getDefaultPythonCompileTarget(other_policies{}, - cudaq::getExecutionContext()); + auto target = + getDefaultCompileTarget(other_policies{}, cudaq::getExecutionContext()); target->fullySpecialize = true; cudaq_internal::compiler::Compiler compiler(std::move(target)); @@ -87,8 +87,8 @@ std::string cudaq::detail::lower_to_openqasm(const std::string &name, OpaqueArguments &args) { ScopedTraceWithContext(cudaq::TIMING_JIT, "getASM", name); - auto target = getDefaultPythonCompileTarget(other_policies{}, - cudaq::getExecutionContext()); + auto target = + getDefaultCompileTarget(other_policies{}, cudaq::getExecutionContext()); target->fullySpecialize = true; cudaq_internal::compiler::Compiler compiler(std::move(target)); diff --git a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp index 6f8f9af5fb4..7f14610359f 100644 --- a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp +++ b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp @@ -7,30 +7,23 @@ ******************************************************************************/ #include "RemoteRESTQPU.h" +#include "common/CompiledModule.h" #include "cudaq_internal/compiler/Compiler.h" using namespace cudaq; cudaq::RemoteRESTQPU::~RemoteRESTQPU() = default; -static std::vector -runCodegen(cudaq_internal::compiler::Compiler &compiler, - const CompiledModule &module, KernelArgs args) { - // TODO: This should be moved into compiler::compileModule, but this would add - // a dependency on the compiler in the C++ launch path. - auto compiled = module; - cudaq_internal::compiler::CompiledModuleHelper::ensureMlirArtifactsExist( - compiled, compiler, args); - - return compiler.emitKernelExecutions(compiled); -} - sample_result RemoteRESTQPU::launchKernel(const sample_policy &policy, const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel {}", policy.name); + if (module.getMlirArtifacts().empty()) + throw std::runtime_error("QPU does not support launching a " + "CompiledModule without MLIR artifacts."); + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); - auto codes = runCodegen(compiler, module, args); + auto codes = compiler.emitKernelExecutions(module); if (compiler.hasWarnedNamedMeasurements()) policy.warnedNamedMeasurements = true; @@ -42,8 +35,12 @@ RemoteRESTQPU::launchKernel(const async_sample_policy &policy, const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel async {}", policy.inner.name); + if (module.getMlirArtifacts().empty()) + throw std::runtime_error("QPU does not support launching a " + "CompiledModule without MLIR artifacts."); + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); - auto codes = runCodegen(compiler, module, args); + auto codes = compiler.emitKernelExecutions(module); if (compiler.hasWarnedNamedMeasurements()) policy.inner.warnedNamedMeasurements = true; @@ -55,31 +52,50 @@ observe_result RemoteRESTQPU::launchKernel(const observe_policy &policy, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel {}", policy.name); + if (module.getMlirArtifacts().empty()) + throw std::runtime_error("QPU does not support launching a " + "CompiledModule without MLIR artifacts."); + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); - auto codes = runCodegen(compiler, module, args); + auto codes = compiler.emitKernelExecutions(module); return completeLaunchKernel(policy, module.getName(), std::move(codes)); } -async_observe_result RemoteRESTQPU::launchKernel(async_observe_policy &policy, - const CompiledModule &module, - KernelArgs args) { +async_observe_result +RemoteRESTQPU::launchKernel(const async_observe_policy &policy, + const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("RemoteRESTQPU::launchKernel async {}", policy.inner.name); + if (module.getMlirArtifacts().empty()) + throw std::runtime_error("QPU does not support launching a " + "CompiledModule without MLIR artifacts."); + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); - auto codes = runCodegen(compiler, module, args); + auto codes = compiler.emitKernelExecutions(module); return completeLaunchKernel(policy, module.getName(), std::move(codes)); } KernelThunkResultType -RemoteRESTQPU::unifiedLaunchModule(const CompiledModule &module, - KernelArgs args) { - CUDAQ_INFO("launching remote rest kernel ({})", module.getName()); - - cudaq_internal::compiler::Compiler compiler( - getCompileTarget(other_policies{}, getExecutionContext())); - auto codes = runCodegen(compiler, module, args); - - completeLaunchKernel(module.getName(), std::move(codes)); +RemoteRESTQPU::unifiedLaunchModule(const AnyModule &module, KernelArgs args) { + CompiledModule compiled; + auto target = getCompileTarget(other_policies{}, getExecutionContext()); + cudaq_internal::compiler::Compiler compiler(std::move(target)); + + if (std::holds_alternative(module)) { + auto source = std::get(module); + CUDAQ_INFO("no compiled kernel found for {}, compiling now", + source.getName()); + cudaq_internal::compiler::CompiledModuleHelper::loadMlirArtifacts(source); + compiled = compiler.runPassPipeline( + source.getName(), source.getMlirOpaqueModulePtr(), args, true); + } else { + compiled = std::get(module); + } + CUDAQ_INFO("launching remote rest kernel ({})", compiled.getName()); + + auto codes = compiler.emitKernelExecutions(compiled); + + completeLaunchKernel(compiled.getName(), std::move(codes)); return {}; } diff --git a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h index 95c7ea84da4..fc319893266 100644 --- a/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h +++ b/runtime/cudaq/platform/default/rest/RemoteRESTQPU.h @@ -9,6 +9,7 @@ #pragma once #include "common/BaseRemoteRESTQPU.h" +#include "common/CompiledModule.h" namespace cudaq { @@ -25,7 +26,7 @@ class RemoteRESTQPU : public BaseRemoteRESTQPU { /// representation required by the targeted backend. Handle all pertinent /// modifications for the execution context as well as asynchronous or /// synchronous invocation. - KernelThunkResultType unifiedLaunchModule(const CompiledModule &module, + KernelThunkResultType unifiedLaunchModule(const AnyModule &module, KernelArgs args) override; sample_result launchKernel(const sample_policy &policy, @@ -40,7 +41,7 @@ class RemoteRESTQPU : public BaseRemoteRESTQPU { const CompiledModule &module, KernelArgs args) override; - async_observe_result launchKernel(async_observe_policy &policy, + async_observe_result launchKernel(const async_observe_policy &policy, const CompiledModule &module, KernelArgs args) override; }; diff --git a/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp b/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp index b443c5d05df..8d50ff25a75 100644 --- a/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp +++ b/runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp @@ -40,45 +40,41 @@ cudaq::FermioniqQPU::launchKernel(const cudaq::observe_policy &policy, CUDAQ_INFO("FermioniqBaseQPU launching kernel ({}) with policy {}", module.getName(), policy.name); - // TODO: This should be moved into compiler::compileModule, but this would add - // a dependency on the compiler in the C++ launch path. - auto compiled = module; - cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); - cudaq_internal::compiler::CompiledModuleHelper::ensureMlirArtifactsExist( - compiled, compiler, args); + if (module.getMlirArtifacts().empty()) + throw std::runtime_error("QPU does not support launching a " + "CompiledModule without MLIR artifacts."); - auto codes = compiler.emitKernelExecutions(compiled); + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy)); + auto codes = compiler.emitKernelExecutions(module); if (codes.size() != 1) throw std::runtime_error("Provider only allows 1 circuit at a time."); attachFermioniqObservable(codes[0], policy.spin); auto result = - completeLaunchKernel(policy, compiled.getName(), std::move(codes)); + completeLaunchKernel(policy, module.getName(), std::move(codes)); auto expectation = result.raw_data().expectation(GlobalRegisterName); return cudaq::observe_result(expectation, result.get_spin(), result.raw_data()); } cudaq::async_observe_result -cudaq::FermioniqQPU::launchKernel(cudaq::async_observe_policy &policy, +cudaq::FermioniqQPU::launchKernel(const cudaq::async_observe_policy &policy, const CompiledModule &module, KernelArgs args) { CUDAQ_INFO("FermioniqBaseQPU launching kernel ({}) with policy {}", module.getName(), policy.inner.name); - // TODO: This should be moved into compiler::compileModule, but this would add - // a dependency on the compiler in the C++ launch path. - auto compiled = module; - cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); - cudaq_internal::compiler::CompiledModuleHelper::ensureMlirArtifactsExist( - compiled, compiler, args); + if (module.getMlirArtifacts().empty()) + throw std::runtime_error("QPU does not support launching a " + "CompiledModule without MLIR artifacts."); - auto codes = compiler.emitKernelExecutions(compiled); + cudaq_internal::compiler::Compiler compiler(getCompileTarget(policy.inner)); + auto codes = compiler.emitKernelExecutions(module); if (codes.size() != 1) throw std::runtime_error("Provider only allows 1 circuit at a time."); attachFermioniqObservable(codes[0], policy.inner.spin); - return completeLaunchKernel(policy, compiled.getName(), std::move(codes)); + return completeLaunchKernel(policy, module.getName(), std::move(codes)); } CUDAQ_REGISTER_TYPE(cudaq::QPU, cudaq::FermioniqQPU, fermioniq) diff --git a/runtime/cudaq/platform/fermioniq/FermioniqQPU.h b/runtime/cudaq/platform/fermioniq/FermioniqQPU.h index 55762991585..0902f30f8bf 100644 --- a/runtime/cudaq/platform/fermioniq/FermioniqQPU.h +++ b/runtime/cudaq/platform/fermioniq/FermioniqQPU.h @@ -48,7 +48,7 @@ class FermioniqQPU : public BaseRemoteRESTQPU { const CompiledModule &module, KernelArgs args) override; - async_observe_result launchKernel(async_observe_policy &policy, + async_observe_result launchKernel(const async_observe_policy &policy, const CompiledModule &module, KernelArgs args) override; }; diff --git a/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h b/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h index eb8cbaa2d49..d078db3f975 100644 --- a/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h +++ b/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.h @@ -22,7 +22,7 @@ class GPUEmulatedQPU : public QPU { void enqueue(QuantumTask &task) override; - KernelThunkResultType unifiedLaunchModule(const cudaq::CompiledModule &src, + KernelThunkResultType unifiedLaunchModule(const cudaq::AnyModule &module, cudaq::KernelArgs args) override; void configureExecutionContext(ExecutionContext &context) const override; diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h index de5953ee534..acca8013262 100644 --- a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h +++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h @@ -9,6 +9,7 @@ #pragma once #include "OrcaExecutor.h" +#include "common/CompiledModule.h" #include "cudaq/platform/qpu.h" #include "cudaq/utils/cudaq_utils.h" #include "cudaq/utils/owning_ptr.h" @@ -96,10 +97,15 @@ class OrcaRemoteRESTQPU : public cudaq::QPU { /// @brief Launch the kernel. Handle all pertinent modifications for the /// execution context. [[nodiscard]] KernelThunkResultType - unifiedLaunchModule(const CompiledModule &module, KernelArgs args) override { + unifiedLaunchModule(const AnyModule &module, KernelArgs args) override { + if (!std::holds_alternative(module)) + throw std::runtime_error( + "OrcaRemoteRESTQPU does not support pre-compiled module launch."); + + const auto &src = std::get(module); auto packed = args.getPacked(); void *argData = packed ? packed->data.data() : nullptr; - return launchKernelCommon(module.getName(), argData); + return launchKernelCommon(src.getName(), argData); } }; } // namespace cudaq diff --git a/runtime/cudaq/platform/qpu.cpp b/runtime/cudaq/platform/qpu.cpp index 3ecd30ca8dc..8f89e9b999d 100644 --- a/runtime/cudaq/platform/qpu.cpp +++ b/runtime/cudaq/platform/qpu.cpp @@ -25,8 +25,9 @@ using namespace cudaq_internal::compiler; using namespace cudaq; cudaq::KernelThunkResultType -cudaq::QPU::unifiedLaunchModule(const CompiledModule &module, KernelArgs args) { - return runJITCompiledModule(module, args); +cudaq::QPU::unifiedLaunchModule(const AnyModule &module, KernelArgs args) { + throw std::runtime_error( + "This QPU does not support launching the other_policies."); } sample_result cudaq::QPU::launchKernel(const sample_policy &policy, @@ -50,9 +51,9 @@ observe_result cudaq::QPU::launchKernel(const observe_policy &policy, "This QPU does not support launching the observe_policy."); } -async_observe_result cudaq::QPU::launchKernel(async_observe_policy &policy, - const CompiledModule &module, - KernelArgs args) { +async_observe_result +cudaq::QPU::launchKernel(const async_observe_policy &policy, + const CompiledModule &module, KernelArgs args) { throw std::runtime_error( "This QPU does not support launching the async_observe_policy."); } diff --git a/runtime/cudaq/platform/qpu.h b/runtime/cudaq/platform/qpu.h index a8a9fcb67d1..ff43d384147 100644 --- a/runtime/cudaq/platform/qpu.h +++ b/runtime/cudaq/platform/qpu.h @@ -150,12 +150,12 @@ class QPU : public registry::RegisteredType { const CompiledModule &module, KernelArgs args); - virtual async_observe_result launchKernel(async_observe_policy &policy, + virtual async_observe_result launchKernel(const async_observe_policy &policy, const CompiledModule &module, KernelArgs args); [[nodiscard]] virtual KernelThunkResultType - unifiedLaunchModule(const CompiledModule &module, KernelArgs args); + unifiedLaunchModule(const AnyModule &module, KernelArgs args); /// Get the compile target of the QPU for the given policy. [[nodiscard]] virtual std::unique_ptr diff --git a/runtime/cudaq/platform/quantum_platform.cpp b/runtime/cudaq/platform/quantum_platform.cpp index a0f85a96261..b8a2286a9e6 100644 --- a/runtime/cudaq/platform/quantum_platform.cpp +++ b/runtime/cudaq/platform/quantum_platform.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "cudaq/platform/quantum_platform.h" +#include "algorithms/policies.h" #include "common/CompiledModule.h" #include "common/Environment.h" #include "common/ExecutionContext.h" @@ -16,7 +17,7 @@ #include "cudaq/algorithms/policy_dispatch.h" #include "cudaq/platform/qpu.h" #include "cudaq/runtime/logger/logger.h" -#include "mlir/IR/BuiltinOps.h" +// #include "mlir/IR/BuiltinOps.h" #include using namespace cudaq_internal::compiler; @@ -122,17 +123,21 @@ getDefaultPythonCompileTargetImpl() { } std::unique_ptr -getDefaultPythonCompileTarget(const sample_policy &) { - return getDefaultPythonCompileTargetImpl(); +getDefaultCompileTarget(const sample_policy &) { + auto ct = getDefaultPythonCompileTargetImpl(); + ct->overrideAOTCompilation = false; + return ct; } std::unique_ptr -getDefaultPythonCompileTarget(const observe_policy &) { - return getDefaultPythonCompileTargetImpl(); +getDefaultCompileTarget(const observe_policy &) { + auto ct = getDefaultPythonCompileTargetImpl(); + ct->overrideAOTCompilation = false; + return ct; } std::unique_ptr -getDefaultPythonCompileTarget(const other_policies &, - ExecutionContext *context) { +getDefaultCompileTarget(const other_policies &, ExecutionContext *context) { auto ct = getDefaultPythonCompileTargetImpl(); + ct->overrideAOTCompilation = false; if (context && context->name == "dem") { ct->emitJit = true; @@ -340,12 +345,16 @@ cudaq::altLaunchKernel(const char *kernelName, SourceModule src{kernName, kernelFunc}; // TODO: we are bypassing the compiler to avoid a dependency on the compiler. // This delays compilation until inside the QPU. - CompiledModule compiled{src}; + // CompiledModule compiled{src}; auto ctx = cudaq::getExecutionContext(); if (ctx && ctx->executeKernelApi) { - ctx->executeKernelApi(compiled, args); + ctx->executeKernelApi(src, args); return {}; } + + // TODO: we are bypassing the compiler to avoid adding a dependency on the + // compiler here. This delays compilation until inside the QPU. + CompiledModule compiled{src}; std::size_t qpu_id = cudaq::getCurrentQpuId(); return platform.unifiedLaunchModule(compiled, args, qpu_id); } @@ -358,14 +367,15 @@ cudaq::streamlinedLaunchKernel(const char *kernelName, std::string kernName = kernelName; KernelArgs args{rawArgs}; SourceModule src{kernName}; - // TODO: we are bypassing the compiler to avoid a dependency on the compiler. - // This delays compilation until inside the QPU. - CompiledModule compiled{src}; auto ctx = cudaq::getExecutionContext(); if (ctx && ctx->executeKernelApi) { - ctx->executeKernelApi(compiled, args); + ctx->executeKernelApi(src, args); return {}; } + + // TODO: we are bypassing the compiler to avoid adding a dependency on the + // compiler here. This delays compilation until inside the QPU. + CompiledModule compiled{src}; auto &platform = *getQuantumPlatformInternal(); std::size_t qpu_id = cudaq::getCurrentQpuId(); [[maybe_unused]] auto r = @@ -386,6 +396,7 @@ cudaq::streamlinedLaunchModule(const CompiledModule &compiled, ctx->executeKernelApi(compiled, {rawArgs}); return {}; } + auto &platform = *getQuantumPlatformInternal(); std::size_t qpu_id = getCurrentQpuId(); return platform.unifiedLaunchModule(compiled, {rawArgs}, qpu_id); @@ -401,9 +412,6 @@ cudaq::hybridLaunchKernel(const char *kernelName, cudaq::KernelThunkType kernel, const std::string kernName = kernelName; std::size_t qpu_id = cudaq::getCurrentQpuId(); SourceModule src{kernName, kernel}; - // TODO: we are bypassing the compiler to avoid a dependency on the compiler. - // This delays compilation until inside the QPU. - CompiledModule compiled{src}; KernelArgs kargs = platform.is_remote(qpu_id) ? KernelArgs{rawArgs} @@ -411,10 +419,14 @@ cudaq::hybridLaunchKernel(const char *kernelName, cudaq::KernelThunkType kernel, auto ctx = cudaq::getExecutionContext(); if (ctx && ctx->executeKernelApi) { - ctx->executeKernelApi(compiled, kargs); + ctx->executeKernelApi(src, kargs); return {}; } + // TODO: we are bypassing the compiler to avoid adding a dependency on the + // compiler here. This delays compilation until inside the QPU. + CompiledModule compiled{src}; + if (platform.is_remote(qpu_id)) { // This path should never call a kernel that returns results. [[maybe_unused]] auto r = diff --git a/runtime/internal/compiler/CompiledModuleHelper.cpp b/runtime/internal/compiler/CompiledModuleHelper.cpp index 7b0cadd04bc..0bd2609b9dc 100644 --- a/runtime/internal/compiler/CompiledModuleHelper.cpp +++ b/runtime/internal/compiler/CompiledModuleHelper.cpp @@ -7,12 +7,12 @@ ******************************************************************************/ #include "cudaq_internal/compiler/CompiledModuleHelper.h" +#include "common/CompiledModule.h" #include "cudaq_internal/compiler/Compiler.h" #include "cudaq_internal/compiler/LayoutInfo.h" #include "cudaq/Optimizer/Builder/RuntimeNames.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/IR/Operation.h" #include "mlir/IR/Types.h" using namespace mlir; @@ -96,17 +96,15 @@ CompiledModule CompiledModuleHelper::createCompiledModule( return compiled; } -void CompiledModuleHelper::ensureMlirArtifactsExist( - cudaq::CompiledModule &module, Compiler &compiler, cudaq::KernelArgs args) { +void CompiledModuleHelper::loadMlirArtifacts(cudaq::SourceModule &module) { if (!module.getMlirArtifacts().empty()) return; auto [moduleOp, context] = cudaq_internal::compiler::Compiler::loadQuakeCodeByName(module.getName()); - - auto compiled = compiler.runPassPipeline(module.getName(), moduleOp, args, - true, std::move(context)); - for (const auto &[name, artifact] : compiled.getArtifacts()) - module.addArtifact(name, std::move(artifact)); + cudaq::FatQuakeModule::MlirArtifact mlirArtifact(moduleOp, + std::move(context)); + module.addArtifact(module.getName(), std::move(mlirArtifact)); } + } // namespace cudaq_internal::compiler diff --git a/runtime/internal/compiler/Compiler.cpp b/runtime/internal/compiler/Compiler.cpp index 3d1430d6a2e..2681efdfcd1 100644 --- a/runtime/internal/compiler/Compiler.cpp +++ b/runtime/internal/compiler/Compiler.cpp @@ -25,6 +25,8 @@ #include "cudaq/Optimizer/Transforms/AddMetadata.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Optimizer/Transforms/ResourceCount.h" +#include "cudaq/algorithms/observe/policy.h" +#include "cudaq/algorithms/sample/policy.h" #include "cudaq/runtime/logger/logger.h" #include "cudaq/utils/cudaq_utils.h" #include "llvm/ADT/SmallSet.h" @@ -703,3 +705,24 @@ mlir::ModuleOp cudaq_internal::compiler::Compiler::lowerQuakeCodeBuildModule( } return moduleOp; } + +// Explicit template instantiations, required to link the calls to the compiler +// from `cudaq::detail::launch` (see runtime/cudaq/algorithms/launch.h). +template cudaq::CompiledModule +cudaq_internal::compiler::compileModule( + const cudaq::sample_policy &, std::unique_ptr, + const cudaq::SourceModule &, cudaq::KernelArgs, bool); +template cudaq::CompiledModule +cudaq_internal::compiler::compileModule( + const cudaq::observe_policy &, std::unique_ptr, + const cudaq::SourceModule &, cudaq::KernelArgs, bool); +template cudaq::CompiledModule cudaq_internal::compiler::compileModule< + cudaq::async_policy_wrapper>( + const cudaq::async_policy_wrapper &, + std::unique_ptr, const cudaq::SourceModule &, + cudaq::KernelArgs, bool); +template cudaq::CompiledModule cudaq_internal::compiler::compileModule< + cudaq::async_policy_wrapper>( + const cudaq::async_policy_wrapper &, + std::unique_ptr, const cudaq::SourceModule &, + cudaq::KernelArgs, bool); diff --git a/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h b/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h index 1e5897c4c13..d11fabdb87b 100644 --- a/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h +++ b/runtime/internal/compiler/include/cudaq_internal/compiler/CompiledModuleHelper.h @@ -8,7 +8,6 @@ #pragma once #include "common/CompiledModule.h" -#include "common/KernelArgs.h" #include namespace mlir { @@ -80,11 +79,8 @@ class CompiledModuleHelper { std::vector compiledArtifacts, cudaq::CompiledModule::CompilationMetadata metadata = {}); - /// Ensure that the module has an MLIR artifact. If it doesn't, load it using - /// `loadQuakeCodeByName` and compile it. - static void ensureMlirArtifactsExist(cudaq::CompiledModule &module, - Compiler &compiler, - cudaq::KernelArgs args); + /// Load the module's source MLIR using `Compiler::loadQuakeCodeByName`. + static void loadMlirArtifacts(cudaq::SourceModule &module); }; } // namespace cudaq_internal::compiler diff --git a/runtime/internal/compiler/include/cudaq_internal/compiler/Compiler.h b/runtime/internal/compiler/include/cudaq_internal/compiler/Compiler.h index 3600a878f74..ebbf646877b 100644 --- a/runtime/internal/compiler/include/cudaq_internal/compiler/Compiler.h +++ b/runtime/internal/compiler/include/cudaq_internal/compiler/Compiler.h @@ -143,6 +143,11 @@ compileModule(const Policy &policy, std::unique_ptr target, const cudaq::SourceModule &src, cudaq::KernelArgs args, bool isEntryPoint = true) { + if (!target->overrideAOTCompilation && src.getFunctionPtr()) { + // We are allowed to use the AOT-compiled module as-is, so nothing to do. + return cudaq::CompiledModule{src}; + } + const auto &kernelName = src.getName(); auto modulePtr = src.getMlirOpaqueModulePtr(); assert(modulePtr && "Compiler::compileModule requires an MLIR artifact"); diff --git a/targettests/TargetConfig/check_disable_mlir_links.cpp b/targettests/TargetConfig/check_disable_mlir_links.cpp index ffffdb3db4d..066d75702ad 100644 --- a/targettests/TargetConfig/check_disable_mlir_links.cpp +++ b/targettests/TargetConfig/check_disable_mlir_links.cpp @@ -7,6 +7,9 @@ ******************************************************************************/ // RUN: nvq++ --disable-mlir-links %s -o %s.x && ! ldd %s.x | grep -q libcudaq-mlir-runtime.so +// RUN: nvq++ --disable-mlir-links %s -o %s.x && ./%s.x +// We expect a failure when emulating a target that requires JIT compilation. +// RUN: nvq++ --disable-mlir-links --target quantinuum --emulate %s -o %s.x && %s.x 2>&1 | FileCheck %s --check-prefix=FAIL #include "cudaq.h" @@ -21,3 +24,5 @@ int main() { counts.dump(); return 0; } + +// FAIL: QPU does not support launching a CompiledModule without MLIR artifacts diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index c9ca33e7d32..a9344cc1f49 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -23,6 +23,10 @@ SET(CMAKE_SHARED_LINKER_FLAGS "") # survives as an internal implementation detail. add_compile_definitions(CUDAQ_LIBRARY_MODE) +# Disable JIT compilation as it is not needed (and we would otherwise have to +# link in the compiler library for no reason) +add_compile_definitions(CUDAQ_DISABLE_JIT_COMPILER) + # ctest's PROCESSORS property tells the scheduler how many CPU slots each test # occupies. Without it, `ctest -j N` launches N OpenMP-parallel tests at once # (2N threads competing for N cores). This value is only applied to tests that diff --git a/unittests/common/ExecutionContextThreadTester.cpp b/unittests/common/ExecutionContextThreadTester.cpp index 1c2383be158..8c853759576 100644 --- a/unittests/common/ExecutionContextThreadTester.cpp +++ b/unittests/common/ExecutionContextThreadTester.cpp @@ -25,7 +25,7 @@ class DummyQPU : public cudaq::QPU { void enqueue(cudaq::QuantumTask &task) override {} cudaq::KernelThunkResultType - unifiedLaunchModule(const cudaq::CompiledModule &module, + unifiedLaunchModule(const cudaq::AnyModule &module, cudaq::KernelArgs args) override { return {}; }