diff --git a/gen/pragma.cpp b/gen/pragma.cpp index 8134bb5d97..347f0047fd 100644 --- a/gen/pragma.cpp +++ b/gen/pragma.cpp @@ -134,6 +134,7 @@ LDCPragma DtoGetPragma(Scope *sc, PragmaDeclaration *decl, {"bitop.bt", LLVMbitop_bt}, {"bitop.btc", LLVMbitop_btc}, {"bitop.btr", LLVMbitop_btr}, {"bitop.bts", LLVMbitop_bts}, {"bitop.vld", LLVMbitop_vld}, {"bitop.vst", LLVMbitop_vst}, + {"convertvector", LLVMconvertvector}, }; static std::string prefix = "ldc."; @@ -414,7 +415,8 @@ void DtoCheckPragma(PragmaDeclaration *decl, Dsymbol *s, break; } - case LLVMatomic_rmw: { + case LLVMatomic_rmw: + case LLVMconvertvector: { const int count = applyTemplatePragma(s, [=](TemplateDeclaration *td) { td->llvmInternal = llvm_internal; td->intrinsicName = arg1str; @@ -595,6 +597,7 @@ bool DtoIsMagicIntrinsic(FuncDeclaration *fd) { case LLVMbitop_bts: case LLVMbitop_vld: case LLVMbitop_vst: + case LLVMconvertvector: return true; default: diff --git a/gen/pragma.h b/gen/pragma.h index 54606165e8..2b831b350a 100644 --- a/gen/pragma.h +++ b/gen/pragma.h @@ -47,6 +47,7 @@ enum LDCPragma { LLVMbitop_bts, LLVMbitop_vld, LLVMbitop_vst, + LLVMconvertvector, LLVMextern_weak, LLVMprofile_instr }; diff --git a/gen/tocall.cpp b/gen/tocall.cpp index c1b0bc1b3c..f2aa849dcc 100644 --- a/gen/tocall.cpp +++ b/gen/tocall.cpp @@ -627,6 +627,68 @@ bool DtoLowerMagicIntrinsic(IRState *p, FuncDeclaration *fndecl, CallExp *e, return true; } + if (fndecl->llvmInternal == LLVMconvertvector) { + if (e->arguments->length != 1) { + error(e->loc, "`convertvector` intrinsic expects 1 argument"); + fatal(); + } + + Expression *exp1 = (*e->arguments)[0]; + LLValue *srcVal = DtoRVal(exp1); + LLType *toLLType = DtoType(e->type); + + auto *fromVecTy = llvm::cast(srcVal->getType()); + LLType *fromElemTy = fromVecTy->getElementType(); + + auto *toVecTy = llvm::cast(toLLType); + LLType *toElemTy = toVecTy->getElementType(); + + Type *fromDType = exp1->type->toBasetype(); + Type *toDType = e->type->toBasetype(); + assert(fromDType->ty == TY::Tvector && toDType->ty == TY::Tvector); + Type *fromElemDType = static_cast(fromDType)->elementType(); + Type *toElemDType = static_cast(toDType)->elementType(); + bool fromUnsigned = fromElemDType->isUnsigned(); + bool toUnsigned = toElemDType->isUnsigned(); + + unsigned fromBits = fromElemTy->getPrimitiveSizeInBits(); + unsigned toBits = toElemTy->getPrimitiveSizeInBits(); + + LLValue *val; + if (fromElemTy->isIntegerTy() && toElemTy->isIntegerTy()) { + if (fromBits < toBits) { + val = fromUnsigned + ? p->ir->CreateZExt(srcVal, toLLType) + : p->ir->CreateSExt(srcVal, toLLType); + } else if (fromBits > toBits) { + val = p->ir->CreateTrunc(srcVal, toLLType); + } else { + val = srcVal; + } + } else if (fromElemTy->isIntegerTy() && toElemTy->isFloatingPointTy()) { + val = fromUnsigned ? p->ir->CreateUIToFP(srcVal, toLLType) + : p->ir->CreateSIToFP(srcVal, toLLType); + } else if (fromElemTy->isFloatingPointTy() && toElemTy->isIntegerTy()) { + val = toUnsigned ? p->ir->CreateFPToUI(srcVal, toLLType) + : p->ir->CreateFPToSI(srcVal, toLLType); + } else if (fromElemTy->isFloatingPointTy() && toElemTy->isFloatingPointTy()) { + if (fromBits < toBits) { + val = p->ir->CreateFPExt(srcVal, toLLType); + } else if (fromBits > toBits) { + val = p->ir->CreateFPTrunc(srcVal, toLLType); + } else { + val = srcVal; + } + } else { + error(e->loc, "unsupported vector element conversion from `%s` to `%s`", + exp1->type->toChars(), e->type->toChars()); + fatal(); + } + + result = new DImValue(e->type, val); + return true; + } + return false; } @@ -734,7 +796,7 @@ class ImplicitArgumentsBuilder { } args.push_back(thisptrLval); } else if (thiscall && dfnval && dfnval->vthis) { - + if (objccall && directcall) { // ... or a Objective-c direct call argument diff --git a/runtime/druntime/src/ldc/intrinsics.di b/runtime/druntime/src/ldc/intrinsics.di index 32268ff24c..74f2eda256 100644 --- a/runtime/druntime/src/ldc/intrinsics.di +++ b/runtime/druntime/src/ldc/intrinsics.di @@ -696,6 +696,22 @@ pragma(LDC_intrinsic, "llvm.assume") pragma(LDC_intrinsic, "llvm.sideeffect") void llvm_sideeffect(); +/// Performs element-wise type conversion between two vector types with the +/// same number of elements. The source and destination vectors must have the +/// same element count but may differ in element type and element size. +/// The conversion follows standard D conversion rules: +/// - Integer-to-integer: sext (signed) or zext (unsigned) for widening, trunc for narrowing +/// - Integer-to-float: sitofp (signed) or uitofp (unsigned) +/// - Float-to-integer: fptosi (signed dest) or fptoui (unsigned dest) +/// - Float-to-float: fpext for widening, fptrunc for narrowing +/// +/// This is the equivalent of GDC/Clang's `__builtin_convertvector`. +pragma(LDC_intrinsic, "ldc.convertvector") + To llvm_convertvector(To, From)(From val) + if (is(From : __vector(V[N]), V, size_t N) && + is(To : __vector(U[M]), U, size_t M) && + N == M); + version (WebAssembly) { /// Grows memory by a given delta and returns the previous size, or -1 if enough diff --git a/tests/codegen/llvm_convertvector_ir.d b/tests/codegen/llvm_convertvector_ir.d new file mode 100644 index 0000000000..d6cc4261b0 --- /dev/null +++ b/tests/codegen/llvm_convertvector_ir.d @@ -0,0 +1,65 @@ +// RUN: %ldc -output-ll -of=%t.ll %s && FileCheck %s < %t.ll + +import core.simd; +import ldc.intrinsics; + +// CHECK-LABEL: define {{.*}}@{{.*}}test_fptrunc +// CHECK: fptrunc <2 x double> {{.*}} to <2 x float> +float2 test_fptrunc(double2 v) { + return llvm_convertvector!(float2)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_fpext +// CHECK: fpext <2 x float> {{.*}} to <2 x double> +double2 test_fpext(float2 v) { + return llvm_convertvector!(double2)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_sext +// CHECK: sext <4 x i16> {{.*}} to <4 x i32> +int4 test_sext(short4 v) { + return llvm_convertvector!(int4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_zext +// CHECK: zext <4 x i16> {{.*}} to <4 x i32> +uint4 test_zext(ushort4 v) { + return llvm_convertvector!(uint4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_trunc +// CHECK: trunc <4 x i64> {{.*}} to <4 x i32> +int4 test_trunc(long4 v) { + return llvm_convertvector!(int4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_sitofp +// CHECK: sitofp <4 x i32> {{.*}} to <4 x float> +float4 test_sitofp(int4 v) { + return llvm_convertvector!(float4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_uitofp +// CHECK: uitofp <4 x i32> {{.*}} to <4 x float> +float4 test_uitofp(uint4 v) { + return llvm_convertvector!(float4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_fptosi +// CHECK: fptosi <4 x float> {{.*}} to <4 x i32> +int4 test_fptosi(float4 v) { + return llvm_convertvector!(int4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_fptoui +// CHECK: fptoui <4 x float> {{.*}} to <4 x i32> +uint4 test_fptoui(float4 v) { + return llvm_convertvector!(uint4)(v); +} + +// CHECK-LABEL: define {{.*}}@{{.*}}test_same_width_int +// Same-width int-to-int should be a no-op (no conversion instruction, same type) +// CHECK-NOT: {{(sext|zext|trunc|sitofp|uitofp|fptosi|fptoui|fptrunc|fpext)}} +int4 test_same_width_int(int4 v) { + return llvm_convertvector!(int4)(v); +} diff --git a/tests/codegen/test_llvm_convertvector.d b/tests/codegen/test_llvm_convertvector.d new file mode 100644 index 0000000000..17ac564c3f --- /dev/null +++ b/tests/codegen/test_llvm_convertvector.d @@ -0,0 +1,52 @@ +// RUN: %ldc -run %s + +import core.simd; +import ldc.intrinsics; + +void main() +{ + // Float-to-float: narrowing (double -> float) + const double2 d2 = [ 1.5, -2.5 ]; + const f2_narrow = llvm_convertvector!(float2)(d2); + assert(f2_narrow is [ 1.5f, -2.5f ]); + + // Float-to-float: widening (float -> double) + const float2 f2 = [ 3.25f, -1.75f ]; + const d2_widen = llvm_convertvector!(double2)(f2); + assert(d2_widen is [ 3.25, -1.75 ]); + + // Int-to-int: widening signed (short -> int) + const short4 s4 = [ 1, -2, 3, -4 ]; + const i4_widen = llvm_convertvector!(int4)(s4); + assert(i4_widen is [ 1, -2, 3, -4 ]); + + // Int-to-int: widening unsigned (ushort -> uint) + const ushort4 us4 = [ 1, 2, 3, 4 ]; + const ui4_widen = llvm_convertvector!(uint4)(us4); + assert(ui4_widen is [ 1, 2, 3, 4 ]); + + // Int-to-int: narrowing (long -> int) + const long4 l4 = [ 1000000L, -2000000L, 3000000L, -4000000L ]; + const i4_narrow = llvm_convertvector!(int4)(l4); + assert(i4_narrow is [ 1000000, -2000000, 3000000, -4000000 ]); + + // Int-to-float: signed + const int4 i4 = [ 1, -2, 3, -4 ]; + const f4_from_i4 = llvm_convertvector!(float4)(i4); + assert(f4_from_i4 is [ 1.0f, -2.0f, 3.0f, -4.0f ]); + + // Int-to-float: unsigned + const uint4 ui4 = [ 1, 2, 3, 4 ]; + const f4_from_ui4 = llvm_convertvector!(float4)(ui4); + assert(f4_from_ui4 is [ 1.0f, 2.0f, 3.0f, 4.0f ]); + + // Float-to-int: signed dest + const float4 f4 = [ 1.5f, -2.7f, 3.2f, -4.9f ]; + const i4_from_f4 = llvm_convertvector!(int4)(f4); + assert(i4_from_f4 is [ 1, -2, 3, -4 ]); + + // Float-to-int: unsigned dest + const float4 f4p = [ 1.5f, 2.7f, 3.2f, 4.9f ]; + const ui4_from_f4 = llvm_convertvector!(uint4)(f4p); + assert(ui4_from_f4 is [ 1, 2, 3, 4 ]); +}