Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cudaq/include/cudaq/Target/CompileTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ class CompileTarget {
/// Whether to fully specialize the kernel.
bool fullySpecialize = true;

/// Whether this target is a local simulator (not remote, not emulated). On
/// this path `i1` vector args are packed as bit-packed `std::vector<bool>`.
bool isLocalSimulator = false;

/// Set the `changeSemantics` flag for the argument synthesis pass.
bool argumentSynthChangeSemantics = true;

Expand Down
37 changes: 37 additions & 0 deletions python/tests/builder/test_kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import random
import numpy as np
import os
import subprocess
import sys
from typing import List

import cudaq
Expand Down Expand Up @@ -1535,6 +1537,41 @@ def test_call_invalid_attribute_on_a_kernel():
assert "not supported on PyKernel" in str(e.value)


def test_repeated_builder_launch_no_segfault():
"""A ``list[bool]`` arg used to corrupt the heap during argument synthesis,
crashing a repeated ``make_kernel`` + ``sample`` loop at a random iteration.

The crash only surfaces when the bit-packed ``std::vector<bool>`` padding is
nonzero, so run under ``MALLOC_PERTURB_`` (set before the process starts) in
a subprocess. See ``runtime/test/test_argument_conversion.cpp`` for the
unit-level regression.
"""
script = (
"import cudaq\n"
"from typing import List\n"
"cudaq.set_target('qpp-cpu')\n"
"for _ in range(512):\n"
" kernel, *_ = cudaq.make_kernel(bool, list[bool], List[int], list[float])\n"
" kernel.qalloc(1)\n"
" cudaq.sample(kernel, False, [False], [3], [3.5])\n"
" cudaq.sample(kernel, False, [], [], [])\n"
"print('OK')\n")
env = dict(os.environ)
# Dirty fresh allocations so the std::vector<bool> padding is never zero,
# otherwise the corruption stays latent and the test passes with the bug.
env["MALLOC_PERTURB_"] = "165"
proc = subprocess.run([sys.executable, "-c", script],
env=env,
capture_output=True,
text=True,
timeout=900)
assert proc.returncode == 0, ("repeated make_kernel/sample crashed "
f"(returncode={proc.returncode}).\n"
f"stdout:\n{proc.stdout}\n"
f"stderr (tail):\n{proc.stderr[-3000:]}")
assert "OK" in proc.stdout


# leave for gdb debugging
if __name__ == "__main__":
loc = os.path.abspath(__file__)
Expand Down
1 change: 1 addition & 0 deletions runtime/cudaq/platform/default/python/QPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ getCompileTarget(cudaq::ExecutionContext *context) {
!(cudaq::is_remote_platform() || cudaq::is_emulated_platform());

ct->fullySpecialize = !isLocalSimulator;
ct->isLocalSimulator = isLocalSimulator;
ct->supportDeviceCalls = true;
ct->emitResourceCounts = context && context->name == "resource-count";
ct->argumentSynthChangeSemantics = false;
Expand Down
91 changes: 61 additions & 30 deletions runtime/internal/compiler/ArgumentConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ static Value genConstant(OpBuilder &builder, const std::string &v,

// Forward declare aggregate type builder as they can be recursive.
static Value genRecursiveSpan(OpBuilder &, cudaq::cc::StdvecType, void *,
ModuleOp, llvm::DataLayout &);
ModuleOp, llvm::DataLayout &, bool);
static Value genConstant(OpBuilder &, cudaq::cc::StdvecType, void *, ModuleOp,
llvm::DataLayout &);
llvm::DataLayout &, bool);
static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, ModuleOp,
llvm::DataLayout &);
llvm::DataLayout &, bool);
static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp,
llvm::DataLayout &);
llvm::DataLayout &, bool);
static Value genConstant(OpBuilder &, cudaq::cc::CallableType, void *, ModuleOp,
llvm::DataLayout &);
llvm::DataLayout &, bool);

/// Create callee.init_N that initializes the state
///
Expand Down Expand Up @@ -524,7 +524,7 @@ static bool isSupportedRecursiveSpan(cudaq::cc::StdvecType ty) {

// Recursive step processing of aggregates.
Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod,
llvm::DataLayout &layout) {
llvm::DataLayout &layout, bool boolVecBitPacked = false) {
auto *ctx = builder.getContext();
return TypeSwitch<Type, Value>(ty)
.Case([&](IntegerType intTy) -> Value {
Expand Down Expand Up @@ -567,16 +567,16 @@ Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod,
substMod);
})
.Case([&](cudaq::cc::StdvecType ty) {
return genConstant(builder, ty, p, substMod, layout);
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
})
.Case([&](cudaq::cc::StructType ty) {
return genConstant(builder, ty, p, substMod, layout);
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
})
.Case([&](cudaq::cc::ArrayType ty) {
return genConstant(builder, ty, p, substMod, layout);
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
})
.Case([&](cudaq::cc::CallableType ty) {
return genConstant(builder, ty, p, substMod, layout);
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
})
.Default({});
}
Expand All @@ -597,21 +597,40 @@ static std::size_t getHostSideElementSize(Type eleTy,
}

/// Recursively builds an `ArrayAttr` containing the constants.
///
/// Set \p boolVecBitPacked when an `i1` vector arg is a host
/// `std::vector<bool>` (bit-packed; not the `{begin, end, capacity}` triple).
ArrayAttr genRecursiveConstantArray(OpBuilder &builder,
cudaq::cc::StdvecType vecTy, void *p,
llvm::DataLayout &layout) {
llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
auto eleTy = vecTy.getElementType();

// Bit-packed `std::vector<bool>`: read via the container API, not a triple.
if (boolVecBitPacked && eleTy.isInteger(1)) {
auto *boolVec = reinterpret_cast<const std::vector<bool> *>(p);
if (boolVec->empty())
return {};
auto intTy = cast<IntegerType>(eleTy);
SmallVector<Attribute> members;
members.reserve(boolVec->size());
for (bool bit : *boolVec)
members.push_back(IntegerAttr::get(intTy, bit ? 1 : 0));
return ArrayAttr::get(builder.getContext(), members);
}

typedef const char *VectorType[3];
VectorType *vecPtr = static_cast<VectorType *>(p);
auto delta = (*vecPtr)[1] - (*vecPtr)[0];
if (!delta)
return {};
auto eleTy = vecTy.getElementType();
unsigned stepBy = 0;
std::function<Attribute(char *)> genAttr;
if (auto innerTy = dyn_cast<cudaq::cc::StdvecType>(eleTy)) {
stepBy = sizeof(VectorType);
genAttr = [&, innerTy](char *p) -> Attribute {
return genRecursiveConstantArray(builder, innerTy, p, layout);
return genRecursiveConstantArray(builder, innerTy, p, layout,
boolVecBitPacked);
};
} else if (auto stringTy = dyn_cast<cudaq::cc::CharspanType>(eleTy)) {
stepBy = sizeof(std::string);
Expand Down Expand Up @@ -688,8 +707,10 @@ static Type convertRecursiveSpanType(Type ty) {
/// constant propagation through the recursive span structure. The reify
/// operation will be lowered to more primitive ops on an as-needed basis.
Value genRecursiveSpan(OpBuilder &builder, cudaq::cc::StdvecType ty, void *p,
ModuleOp substMod, llvm::DataLayout &layout) {
ArrayAttr constants = genRecursiveConstantArray(builder, ty, p, layout);
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
ArrayAttr constants =
genRecursiveConstantArray(builder, ty, p, layout, boolVecBitPacked);
auto loc = builder.getUnknownLoc();
if (!constants) {
// Empty vector. Not much to contemplate here.
Expand All @@ -705,9 +726,11 @@ Value genRecursiveSpan(OpBuilder &builder, cudaq::cc::StdvecType ty, void *p,
}

Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout) {
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
if (isSupportedRecursiveSpan(vecTy))
return genRecursiveSpan(builder, vecTy, p, substMod, layout);
return genRecursiveSpan(builder, vecTy, p, substMod, layout,
boolVecBitPacked);
typedef const char *VectorType[3];
VectorType *vecPtr = static_cast<VectorType *>(p);
auto delta = (*vecPtr)[1] - (*vecPtr)[0];
Expand All @@ -727,7 +750,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p,
for (std::int32_t i = 0; i < vecSize; ++i) {
if (Value val = dispatchSubtype(
builder, eleTy, static_cast<void *>(const_cast<char *>(cursor)),
substMod, layout)) {
substMod, layout, boolVecBitPacked)) {
auto atLoc = cudaq::cc::ComputePtrOp::create(
builder, loc, elePtrTy, buffer,
ArrayRef<cudaq::cc::ComputePtrArg>{i});
Expand All @@ -740,7 +763,8 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p,
}

Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout) {
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
if (strTy.getMembers().empty())
return {};
const char *cursor = static_cast<const char *>(p);
Expand All @@ -752,15 +776,16 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p,
builder, iter.value(),
static_cast<void *>(const_cast<char *>(
cursor + cudaq::opt::getDataOffset(layout, strTy, i))),
substMod, layout))
substMod, layout, boolVecBitPacked))
aggie =
cudaq::cc::InsertValueOp::create(builder, loc, strTy, aggie, v, i);
}
return aggie;
}

Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout) {
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
if (!p)
return {};
auto loc = builder.getUnknownLoc();
Expand All @@ -787,7 +812,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p,
if (hasLiftedArgs) {
for (unsigned i = liftedPos, j = 0; i < liftedArity; ++i, ++j) {
Value v = dispatchSubtype(builder, calleeInpTys[i], closureArgs[j],
substMod, layout);
substMod, layout, boolVecBitPacked);
assert(v && "lifted argument must be handled");
args.push_back(v);
}
Expand All @@ -806,7 +831,8 @@ Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p,
}

Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout) {
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
if (arrTy.isUnknownSize())
return {};
auto eleTy = arrTy.getElementType();
Expand All @@ -818,7 +844,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p,
for (std::size_t i = 0; i < arrSize; ++i) {
if (Value v = dispatchSubtype(
builder, eleTy, static_cast<void *>(const_cast<char *>(cursor)),
substMod, layout))
substMod, layout, boolVecBitPacked))
aggie =
cudaq::cc::InsertValueOp::create(builder, loc, arrTy, aggie, v, i);
cursor += eleSize;
Expand Down Expand Up @@ -854,8 +880,9 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy,
//===----------------------------------------------------------------------===//

cudaq_internal::compiler::ArgumentConverter::ArgumentConverter(
StringRef kernelName, ModuleOp sourceModule)
: sourceModule(sourceModule), kernelName(kernelName) {}
StringRef kernelName, ModuleOp sourceModule, bool boolVecBitPacked)
: sourceModule(sourceModule), kernelName(kernelName),
boolVecBitPacked(boolVecBitPacked) {}

void cudaq_internal::compiler::ArgumentConverter::gen(
std::span<void *const> arguments) {
Expand Down Expand Up @@ -954,19 +981,23 @@ void cudaq_internal::compiler::ArgumentConverter::gen(
return {};
})
.Case([&](cudaq::cc::StdvecType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout);
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
})
.Case([&](cudaq::cc::StructType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout);
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
})
.Case([&](cudaq::cc::ArrayType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout);
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
})
.Case([&](cudaq::cc::IndirectCallableType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout);
})
.Case([&](cudaq::cc::CallableType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout);
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
})
.Default({});
if (subst)
Expand Down
6 changes: 5 additions & 1 deletion runtime/internal/compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,11 @@ cudaq_internal::compiler::Compiler::prepareModule(const std::string &kernelName,
// For quantum devices, we generate a collection of `init` and
// `num_qubits` functions and their substitutions created
// from a kernel and arguments that generated a state argument.
cudaq_internal::compiler::ArgumentConverter argCon(kernelName, moduleOp);
// Local simulators marshal `i1` vectors as bit-packed `std::vector<bool>`
// (argsCreator); remote/emulated targets use `std::vector<char>`.
const bool boolVecBitPacked = target->isLocalSimulator;
cudaq_internal::compiler::ArgumentConverter argCon(kernelName, moduleOp,
boolVecBitPacked);
// Must stay in scope as `eraseNonCallableArguments` may populate it
std::vector<void *> closureArgs;
if (cudaq::opt::marshal::isFullySynthesized(epFunc)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ class ArgumentConverter {
public:
/// Build an instance to create argument substitutions for a specified \p
/// kernelName in \p sourceModule.
ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule);
///
/// Set \p boolVecBitPacked when `i1` vector args are host `std::vector<bool>`
/// (local-simulator launch path), not `std::vector<char>`.
ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule,
bool boolVecBitPacked = false);

~ArgumentConverter() {
for (auto *kInfo : kernelSubstitutions) {
Expand Down Expand Up @@ -110,6 +114,10 @@ class ArgumentConverter {

/// Kernel we are substituting the arguments for.
mlir::StringRef kernelName;

/// Whether `i1` vector args are bit-packed `std::vector<bool>` (vs
/// `std::vector<char>`). See the constructor.
bool boolVecBitPacked;
};

/// Merge modules from any CallableClosureArgument arguments into \p intoModule.
Expand Down
21 changes: 19 additions & 2 deletions runtime/test/test_argument_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ void dumpSubstitutionModules(ArgumentConverter &con) {

void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName,
std::vector<void *> args,
const std::string &additionalCode = "") {
const std::string &additionalCode = "",
bool boolVecBitPacked = false) {
std::string code = additionalCode + R"#(
func.func private @callee(%0: )#" +
typeName + R"#()
Expand All @@ -166,7 +167,7 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" +
// Create the Module
auto mod = mlir::parseSourceString<mlir::ModuleOp>(code, ctx);
llvm::outs() << "Source module:\n" << *mod << '\n';
ArgumentConverter ab{"testy", *mod};
ArgumentConverter ab{"testy", *mod, boolVecBitPacked};
// Create the argument conversions
ab.gen(args);
// Dump all conversions
Expand Down Expand Up @@ -400,6 +401,22 @@ void test_vectors(mlir::MLIRContext *ctx) {
// CHECK: }
// clang-format on

{
// Real bit-packed `std::vector<bool>`, as the local-simulator launch path
// passes it. Reading this as a `{begin, end, capacity}` triple corrupts the
// heap; `boolVecBitPacked` selects the correct reader.
std::vector<bool> x = {true, false, true, true};
std::vector<void *> v = {static_cast<void *>(&x)};
doSimpleTest(ctx, "!cc.stdvec<i1>", v, /*additionalCode=*/"",
/*boolVecBitPacked=*/true);
}
// clang-format off
// CHECK-LABEL: cc.arg_subst[0] {
// CHECK: %[[VAL_0:.*]] = cc.const_array [true, false, true, true] : !cc.array<i1 x ?>
// CHECK: %[[VAL_1:.*]] = cc.reify_span %[[VAL_0]] : (!cc.array<i1 x ?>) -> !cc.stdvec<i1>
// CHECK: }
// clang-format on

{
std::vector<std::vector<cudaq::pauli_word>> x = {
{cudaq::pauli_word{"XX"}, cudaq::pauli_word{"XY"}},
Expand Down
Loading