aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp603
1 files changed, 517 insertions, 86 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f9871b233149..09fd3087b494 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -20,6 +20,7 @@
#include "PatternInit.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/OSLog.h"
#include "clang/Basic/TargetBuiltins.h"
@@ -30,6 +31,17 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -45,7 +57,8 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
return std::min(High, std::max(Low, Value));
}
-static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) {
+static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
+ Align AlignmentInBytes) {
ConstantInt *Byte;
switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
case LangOptions::TrivialAutoVarInitKind::Uninitialized:
@@ -347,6 +360,58 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
}
// Emit a simple mangled intrinsic that has 1 argument and a return type
+// matching the argument type. Depending on mode, this may be a constrained
+// floating-point intrinsic.
+static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E, unsigned IntrinsicID,
+ unsigned ConstrainedIntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+
+ if (CGF.Builder.getIsFPConstrained()) {
+ Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
+ return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
+ } else {
+ Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, Src0);
+ }
+}
+
+// Emit an intrinsic that has 2 operands of the same type as its result.
+// Depending on mode, this may be a constrained floating-point intrinsic.
+static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E, unsigned IntrinsicID,
+ unsigned ConstrainedIntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+ if (CGF.Builder.getIsFPConstrained()) {
+ Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
+ return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
+ } else {
+ Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, { Src0, Src1 });
+ }
+}
+
+// Emit an intrinsic that has 3 operands of the same type as its result.
+// Depending on mode, this may be a constrained floating-point intrinsic.
+static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
+ const CallExpr *E, unsigned IntrinsicID,
+ unsigned ConstrainedIntrinsicID) {
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
+
+ if (CGF.Builder.getIsFPConstrained()) {
+ Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
+ return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
+ } else {
+ Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+ return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
+ }
+}
+
+// Emit a simple mangled intrinsic that has 1 argument and a return type
// matching the argument type.
static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
@@ -392,15 +457,22 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
}
// Emit an intrinsic that has overloaded integer result and fp operand.
-static Value *emitFPToIntRoundBuiltin(CodeGenFunction &CGF,
- const CallExpr *E,
- unsigned IntrinsicID) {
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
- llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+static Value *
+emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
+ unsigned IntrinsicID,
+ unsigned ConstrainedIntrinsicID) {
+ llvm::Type *ResultType = CGF.ConvertType(E->getType());
+ llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
- Function *F = CGF.CGM.getIntrinsic(IntrinsicID,
- {ResultType, Src0->getType()});
- return CGF.Builder.CreateCall(F, Src0);
+ if (CGF.Builder.getIsFPConstrained()) {
+ Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
+ {ResultType, Src0->getType()});
+ return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
+ } else {
+ Function *F =
+ CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
+ return CGF.Builder.CreateCall(F, Src0);
+ }
}
/// EmitFAbs - Emit a call to @llvm.fabs().
@@ -749,8 +821,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
// X86 has special BT, BTC, BTR, and BTS instructions that handle the array
// indexing operation internally. Use them if possible.
- llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
- if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+ if (CGF.getTarget().getTriple().isX86())
return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
// Otherwise, use generic code to load one byte and test the bit. Use all but
@@ -1558,7 +1629,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_ceilf:
case Builtin::BI__builtin_ceilf16:
case Builtin::BI__builtin_ceill:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::ceil,
+ Intrinsic::experimental_constrained_ceil));
case Builtin::BIcopysign:
case Builtin::BIcopysignf:
@@ -1577,7 +1650,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_cosf:
case Builtin::BI__builtin_cosf16:
case Builtin::BI__builtin_cosl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::cos,
+ Intrinsic::experimental_constrained_cos));
case Builtin::BIexp:
case Builtin::BIexpf:
@@ -1586,7 +1661,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_expf:
case Builtin::BI__builtin_expf16:
case Builtin::BI__builtin_expl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::exp,
+ Intrinsic::experimental_constrained_exp));
case Builtin::BIexp2:
case Builtin::BIexp2f:
@@ -1595,7 +1672,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_exp2f:
case Builtin::BI__builtin_exp2f16:
case Builtin::BI__builtin_exp2l:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::exp2,
+ Intrinsic::experimental_constrained_exp2));
case Builtin::BIfabs:
case Builtin::BIfabsf:
@@ -1614,7 +1693,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_floorf:
case Builtin::BI__builtin_floorf16:
case Builtin::BI__builtin_floorl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::floor,
+ Intrinsic::experimental_constrained_floor));
case Builtin::BIfma:
case Builtin::BIfmaf:
@@ -1623,7 +1704,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmaf:
case Builtin::BI__builtin_fmaf16:
case Builtin::BI__builtin_fmal:
- return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
+ return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::fma,
+ Intrinsic::experimental_constrained_fma));
case Builtin::BIfmax:
case Builtin::BIfmaxf:
@@ -1632,7 +1715,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmaxf:
case Builtin::BI__builtin_fmaxf16:
case Builtin::BI__builtin_fmaxl:
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::maxnum,
+ Intrinsic::experimental_constrained_maxnum));
case Builtin::BIfmin:
case Builtin::BIfminf:
@@ -1641,7 +1726,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fminf:
case Builtin::BI__builtin_fminf16:
case Builtin::BI__builtin_fminl:
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::minnum,
+ Intrinsic::experimental_constrained_minnum));
// fmod() is a special-case. It maps to the frem instruction rather than an
// LLVM intrinsic.
@@ -1664,7 +1751,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_logf:
case Builtin::BI__builtin_logf16:
case Builtin::BI__builtin_logl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::log,
+ Intrinsic::experimental_constrained_log));
case Builtin::BIlog10:
case Builtin::BIlog10f:
@@ -1673,7 +1762,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_log10f:
case Builtin::BI__builtin_log10f16:
case Builtin::BI__builtin_log10l:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::log10,
+ Intrinsic::experimental_constrained_log10));
case Builtin::BIlog2:
case Builtin::BIlog2f:
@@ -1682,7 +1773,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_log2f:
case Builtin::BI__builtin_log2f16:
case Builtin::BI__builtin_log2l:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::log2,
+ Intrinsic::experimental_constrained_log2));
case Builtin::BInearbyint:
case Builtin::BInearbyintf:
@@ -1690,7 +1783,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_nearbyint:
case Builtin::BI__builtin_nearbyintf:
case Builtin::BI__builtin_nearbyintl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::nearbyint,
+ Intrinsic::experimental_constrained_nearbyint));
case Builtin::BIpow:
case Builtin::BIpowf:
@@ -1699,7 +1794,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_powf:
case Builtin::BI__builtin_powf16:
case Builtin::BI__builtin_powl:
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::pow,
+ Intrinsic::experimental_constrained_pow));
case Builtin::BIrint:
case Builtin::BIrintf:
@@ -1708,7 +1805,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_rintf:
case Builtin::BI__builtin_rintf16:
case Builtin::BI__builtin_rintl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::rint,
+ Intrinsic::experimental_constrained_rint));
case Builtin::BIround:
case Builtin::BIroundf:
@@ -1717,7 +1816,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_roundf:
case Builtin::BI__builtin_roundf16:
case Builtin::BI__builtin_roundl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::round,
+ Intrinsic::experimental_constrained_round));
case Builtin::BIsin:
case Builtin::BIsinf:
@@ -1726,7 +1827,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sinf:
case Builtin::BI__builtin_sinf16:
case Builtin::BI__builtin_sinl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::sin,
+ Intrinsic::experimental_constrained_sin));
case Builtin::BIsqrt:
case Builtin::BIsqrtf:
@@ -1735,7 +1838,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sqrtf:
case Builtin::BI__builtin_sqrtf16:
case Builtin::BI__builtin_sqrtl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::sqrt,
+ Intrinsic::experimental_constrained_sqrt));
case Builtin::BItrunc:
case Builtin::BItruncf:
@@ -1744,7 +1849,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_truncf:
case Builtin::BI__builtin_truncf16:
case Builtin::BI__builtin_truncl:
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
+ return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
+ Intrinsic::trunc,
+ Intrinsic::experimental_constrained_trunc));
case Builtin::BIlround:
case Builtin::BIlroundf:
@@ -1752,7 +1859,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_lround:
case Builtin::BI__builtin_lroundf:
case Builtin::BI__builtin_lroundl:
- return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lround));
+ return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
+ *this, E, Intrinsic::lround,
+ Intrinsic::experimental_constrained_lround));
case Builtin::BIllround:
case Builtin::BIllroundf:
@@ -1760,7 +1869,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_llround:
case Builtin::BI__builtin_llroundf:
case Builtin::BI__builtin_llroundl:
- return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llround));
+ return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
+ *this, E, Intrinsic::llround,
+ Intrinsic::experimental_constrained_llround));
case Builtin::BIlrint:
case Builtin::BIlrintf:
@@ -1768,7 +1879,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_lrint:
case Builtin::BI__builtin_lrintf:
case Builtin::BI__builtin_lrintl:
- return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lrint));
+ return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
+ *this, E, Intrinsic::lrint,
+ Intrinsic::experimental_constrained_lrint));
case Builtin::BIllrint:
case Builtin::BIllrintf:
@@ -1776,7 +1889,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_llrint:
case Builtin::BI__builtin_llrintf:
case Builtin::BI__builtin_llrintl:
- return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llrint));
+ return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
+ *this, E, Intrinsic::llrint,
+ Intrinsic::experimental_constrained_llrint));
default:
break;
@@ -1822,16 +1937,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_conj:
case Builtin::BI__builtin_conjf:
- case Builtin::BI__builtin_conjl: {
+ case Builtin::BI__builtin_conjl:
+ case Builtin::BIconj:
+ case Builtin::BIconjf:
+ case Builtin::BIconjl: {
ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
Value *Real = ComplexVal.first;
Value *Imag = ComplexVal.second;
- Value *Zero =
- Imag->getType()->isFPOrFPVectorTy()
- ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
- : llvm::Constant::getNullValue(Imag->getType());
-
- Imag = Builder.CreateFSub(Zero, Imag, "sub");
+ Imag = Builder.CreateFNeg(Imag, "neg");
return RValue::getComplex(std::make_pair(Real, Imag));
}
case Builtin::BI__builtin_creal:
@@ -2178,13 +2291,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_powi:
case Builtin::BI__builtin_powif:
- case Builtin::BI__builtin_powil: {
- Value *Base = EmitScalarExpr(E->getArg(0));
- Value *Exponent = EmitScalarExpr(E->getArg(1));
- llvm::Type *ArgType = Base->getType();
- Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
- return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
- }
+ case Builtin::BI__builtin_powil:
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
+ *this, E, Intrinsic::powi, Intrinsic::experimental_constrained_powi));
case Builtin::BI__builtin_isgreater:
case Builtin::BI__builtin_isgreaterequal:
@@ -2358,12 +2467,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *Size = EmitScalarExpr(E->getArg(0));
const TargetInfo &TI = getContext().getTargetInfo();
// The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
- unsigned SuitableAlignmentInBytes =
+ const Align SuitableAlignmentInBytes =
CGM.getContext()
.toCharUnitsFromBits(TI.getSuitableAlign())
- .getQuantity();
+ .getAsAlign();
AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
- AI->setAlignment(MaybeAlign(SuitableAlignmentInBytes));
+ AI->setAlignment(SuitableAlignmentInBytes);
initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
return RValue::get(AI);
}
@@ -2373,10 +2482,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
- unsigned AlignmentInBytes =
- CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
+ const Align AlignmentInBytes =
+ CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
- AI->setAlignment(MaybeAlign(AlignmentInBytes));
+ AI->setAlignment(AlignmentInBytes);
initializeAlloca(*this, AI, Size, AlignmentInBytes);
return RValue::get(AI);
}
@@ -2391,7 +2500,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(nullptr);
}
case Builtin::BImemcpy:
- case Builtin::BI__builtin_memcpy: {
+ case Builtin::BI__builtin_memcpy:
+ case Builtin::BImempcpy:
+ case Builtin::BI__builtin_mempcpy: {
Address Dest = EmitPointerWithAlignment(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *SizeVal = EmitScalarExpr(E->getArg(2));
@@ -2400,7 +2511,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
E->getArg(1)->getExprLoc(), FD, 1);
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
- return RValue::get(Dest.getPointer());
+ if (BuiltinID == Builtin::BImempcpy ||
+ BuiltinID == Builtin::BI__builtin_mempcpy)
+ return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal));
+ else
+ return RValue::get(Dest.getPointer());
}
case Builtin::BI__builtin_char_memchr:
@@ -3367,7 +3482,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Carry);
}
case Builtin::BI__builtin_addressof:
- return RValue::get(EmitLValue(E->getArg(0)).getPointer());
+ return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
case Builtin::BI__builtin_operator_new:
return EmitBuiltinNewDeleteCall(
E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
@@ -3375,6 +3490,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return EmitBuiltinNewDeleteCall(
E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
+ case Builtin::BI__builtin_is_aligned:
+ return EmitBuiltinIsAligned(E);
+ case Builtin::BI__builtin_align_up:
+ return EmitBuiltinAlignTo(E, true);
+ case Builtin::BI__builtin_align_down:
+ return EmitBuiltinAlignTo(E, false);
+
case Builtin::BI__noop:
// __noop always evaluates to an integer literal zero.
return RValue::get(ConstantInt::get(IntTy, 0));
@@ -3750,8 +3872,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
- llvm::Value *Range = NDRangeL.getAddress().getPointer();
- llvm::Type *RangeTy = NDRangeL.getAddress().getType();
+ llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
+ llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
if (NumArgs == 4) {
// The most basic form of the call with parameters:
@@ -3770,7 +3892,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
AttrBuilder B;
- B.addByValAttr(NDRangeL.getAddress().getElementType());
+ B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
llvm::AttributeList ByValAttrSet =
llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
@@ -3955,7 +4077,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
- llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
+ llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
@@ -4217,9 +4339,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(V);
}
- // See if we have a target specific builtin that needs to be lowered.
- if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
- return RValue::get(V);
+ // Some target-specific builtins can have aggregate return values, e.g.
+ // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
+ // ReturnValue to be non-null, so that the target-specific emission code can
+ // always just emit into it.
+ TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
+ if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
+ Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
+ ReturnValue = ReturnValueSlot(DestPtr, false);
+ }
+
+ // Now see if we can emit a target-specific builtin.
+ if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
+ switch (EvalKind) {
+ case TEK_Scalar:
+ return RValue::get(V);
+ case TEK_Aggregate:
+ return RValue::getAggregate(ReturnValue.getValue(),
+ ReturnValue.isVolatile());
+ case TEK_Complex:
+ llvm_unreachable("No current target builtin returns complex");
+ }
+ llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
+ }
ErrorUnsupported(E, "builtin function");
@@ -4229,14 +4371,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
unsigned BuiltinID, const CallExpr *E,
+ ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
switch (Arch) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
- return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
+ return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
case llvm::Triple::bpfeb:
@@ -4268,15 +4412,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
}
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+ const CallExpr *E,
+ ReturnValueSlot ReturnValue) {
if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
assert(getContext().getAuxTargetInfo() && "Missing aux target info");
return EmitTargetArchBuiltinExpr(
this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
- getContext().getAuxTargetInfo()->getTriple().getArch());
+ ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
}
- return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
+ return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
getTarget().getTriple().getArch());
}
@@ -4451,6 +4596,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcage_v, arm_neon_vacge, 0),
NEONMAP1(vcageq_v, arm_neon_vacge, 0),
NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
@@ -4618,10 +4767,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
- NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
- NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
+ NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
+ NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
@@ -4639,8 +4788,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
- NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
@@ -4724,6 +4873,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
+ NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
NEONMAP1(vcage_v, aarch64_neon_facge, 0),
NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
@@ -5453,6 +5606,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
}
+ case NEON::BI__builtin_neon_vcvtx_f32_v: {
+ llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
+
+ }
case NEON::BI__builtin_neon_vext_v:
case NEON::BI__builtin_neon_vextq_v: {
int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
@@ -5668,7 +5826,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
// TODO: Currently in AArch32 mode the pointer operand comes first, whereas
// in AArch64 it comes last. We may want to stick to one or another.
- if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
+ if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
+ Arch == llvm::Triple::aarch64_32) {
llvm::Type *Tys[2] = { VTy, PTy };
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
@@ -6004,6 +6163,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
+ ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
if (auto Hint = GetValueForARMHint(BuiltinID))
return Hint;
@@ -6052,6 +6212,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
}
+ if (BuiltinID == ARM::BI__builtin_arm_cls) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
+ }
+ if (BuiltinID == ARM::BI__builtin_arm_cls64) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
+ "cls");
+ }
+
if (BuiltinID == ARM::BI__clear_cache) {
assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
const FunctionDecl *FD = E->getDirectCallee();
@@ -6320,6 +6490,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
}
+ // Deal with MVE builtins
+ if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
+ return Result;
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -6769,6 +6943,152 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
}
+template<typename Integer>
+static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
+ llvm::APSInt IntVal;
+ bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
+ assert(IsConst && "Sema should have checked this was a constant");
+ (void)IsConst;
+ return IntVal.getExtValue();
+}
+
+static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
+ llvm::Type *T, bool Unsigned) {
+ // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
+ // which finds it convenient to specify signed/unsigned as a boolean flag.
+ return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
+}
+
+static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
+ uint32_t Shift, bool Unsigned) {
+ // MVE helper function for integer shift right. This must handle signed vs
+ // unsigned, and also deal specially with the case where the shift count is
+ // equal to the lane size. In LLVM IR, an LShr with that parameter would be
+ // undefined behavior, but in MVE it's legal, so we must convert it to code
+ // that is not undefined in IR.
+ unsigned LaneBits =
+ V->getType()->getVectorElementType()->getPrimitiveSizeInBits();
+ if (Shift == LaneBits) {
+ // An unsigned shift of the full lane size always generates zero, so we can
+ // simply emit a zero vector. A signed shift of the full lane size does the
+ // same thing as shifting by one bit fewer.
+ if (Unsigned)
+ return llvm::Constant::getNullValue(V->getType());
+ else
+ --Shift;
+ }
+ return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
+}
+
+static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
+ // MVE-specific helper function for a vector splat, which infers the element
+ // count of the output vector by knowing that MVE vectors are all 128 bits
+ // wide.
+ unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
+ return Builder.CreateVectorSplat(Elements, V);
+}
+
+Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E,
+ ReturnValueSlot ReturnValue,
+ llvm::Triple::ArchType Arch) {
+ enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
+ Intrinsic::ID IRIntr;
+ unsigned NumVectors;
+
+ // Code autogenerated by Tablegen will handle all the simple builtins.
+ switch (BuiltinID) {
+ #include "clang/Basic/arm_mve_builtin_cg.inc"
+
+ // If we didn't match an MVE builtin id at all, go back to the
+ // main EmitARMBuiltinExpr.
+ default:
+ return nullptr;
+ }
+
+ // Anything that breaks from that switch is an MVE builtin that
+ // needs handwritten code to generate.
+
+ switch (CustomCodeGenType) {
+
+ case CustomCodeGen::VLD24: {
+ llvm::SmallVector<Value *, 4> Ops;
+ llvm::SmallVector<llvm::Type *, 4> Tys;
+
+ auto MvecCType = E->getType();
+ auto MvecLType = ConvertType(MvecCType);
+ assert(MvecLType->isStructTy() &&
+ "Return type for vld[24]q should be a struct");
+ assert(MvecLType->getStructNumElements() == 1 &&
+ "Return-type struct for vld[24]q should have one element");
+ auto MvecLTypeInner = MvecLType->getStructElementType(0);
+ assert(MvecLTypeInner->isArrayTy() &&
+ "Return-type struct for vld[24]q should contain an array");
+ assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
+ "Array member of return-type struct vld[24]q has wrong length");
+ auto VecLType = MvecLTypeInner->getArrayElementType();
+
+ Tys.push_back(VecLType);
+
+ auto Addr = E->getArg(0);
+ Ops.push_back(EmitScalarExpr(Addr));
+ Tys.push_back(ConvertType(Addr->getType()));
+
+ Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
+ Value *LoadResult = Builder.CreateCall(F, Ops);
+ Value *MvecOut = UndefValue::get(MvecLType);
+ for (unsigned i = 0; i < NumVectors; ++i) {
+ Value *Vec = Builder.CreateExtractValue(LoadResult, i);
+ MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
+ }
+
+ if (ReturnValue.isNull())
+ return MvecOut;
+ else
+ return Builder.CreateStore(MvecOut, ReturnValue.getValue());
+ }
+
+ case CustomCodeGen::VST24: {
+ llvm::SmallVector<Value *, 4> Ops;
+ llvm::SmallVector<llvm::Type *, 4> Tys;
+
+ auto Addr = E->getArg(0);
+ Ops.push_back(EmitScalarExpr(Addr));
+ Tys.push_back(ConvertType(Addr->getType()));
+
+ auto MvecCType = E->getArg(1)->getType();
+ auto MvecLType = ConvertType(MvecCType);
+ assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
+ assert(MvecLType->getStructNumElements() == 1 &&
+ "Data-type struct for vst2q should have one element");
+ auto MvecLTypeInner = MvecLType->getStructElementType(0);
+ assert(MvecLTypeInner->isArrayTy() &&
+ "Data-type struct for vst2q should contain an array");
+ assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
+ "Array member of return-type struct vld[24]q has wrong length");
+ auto VecLType = MvecLTypeInner->getArrayElementType();
+
+ Tys.push_back(VecLType);
+
+ AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
+ EmitAggExpr(E->getArg(1), MvecSlot);
+ auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
+ for (unsigned i = 0; i < NumVectors; i++)
+ Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
+
+ Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
+ Value *ToReturn = nullptr;
+ for (unsigned i = 0; i < NumVectors; i++) {
+ Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
+ ToReturn = Builder.CreateCall(F, Ops);
+ Ops.pop_back();
+ }
+ return ToReturn;
+ }
+ }
+ llvm_unreachable("unknown custom codegen type.");
+}
+
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
const CallExpr *E,
SmallVectorImpl<Value *> &Ops,
@@ -7000,6 +7320,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
}
+ if (BuiltinID == AArch64::BI__builtin_arm_cls) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
+ "cls");
+ }
+ if (BuiltinID == AArch64::BI__builtin_arm_cls64) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
+ "cls");
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {
assert((getContext().getTypeSize(E->getType()) == 32) &&
"__jcvt of unusual size!");
@@ -9311,14 +9642,14 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
if (!getDebugInfo()) {
CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g");
return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
- : EmitLValue(Arg).getPointer();
+ : EmitLValue(Arg).getPointer(*this);
}
// Enable underlying preserve_*_access_index() generation.
bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
IsInPreservedAIRegion = true;
Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
- : EmitLValue(Arg).getPointer();
+ : EmitLValue(Arg).getPointer(*this);
IsInPreservedAIRegion = OldIsInPreservedAIRegion;
ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
@@ -12175,7 +12506,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__stosb: {
// We treat __stosb as a volatile memset - it may not generate "rep stosb"
// instruction, but it will create a memset that won't be optimized away.
- return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
+ return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align::None(), true);
}
case X86::BI__ud2:
// llvm.trap makes a ud2a instruction on x86.
@@ -12993,9 +13324,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+ return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
}
case SystemZ::BI__builtin_s390_vfnmasb:
case SystemZ::BI__builtin_s390_vfnmadb: {
@@ -13003,9 +13333,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
+ return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
}
case SystemZ::BI__builtin_s390_vfnmssb:
case SystemZ::BI__builtin_s390_vfnmsdb: {
@@ -13013,10 +13342,9 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *Z = EmitScalarExpr(E->getArg(2));
- Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
- Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
- return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
+ Value *NegZ = Builder.CreateFNeg(Z, "neg");
+ return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
}
case SystemZ::BI__builtin_s390_vflpsb:
case SystemZ::BI__builtin_s390_vflpdb: {
@@ -13029,9 +13357,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
case SystemZ::BI__builtin_s390_vflndb: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
- Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
- return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
+ return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
}
case SystemZ::BI__builtin_s390_vfisb:
case SystemZ::BI__builtin_s390_vfidb: {
@@ -13933,6 +14260,96 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
}
}
+namespace {
+struct BuiltinAlignArgs {
+ llvm::Value *Src = nullptr;
+ llvm::Type *SrcType = nullptr;
+ llvm::Value *Alignment = nullptr;
+ llvm::Value *Mask = nullptr;
+ llvm::IntegerType *IntType = nullptr;
+
+ BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
+ QualType AstType = E->getArg(0)->getType();
+ if (AstType->isArrayType())
+ Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
+ else
+ Src = CGF.EmitScalarExpr(E->getArg(0));
+ SrcType = Src->getType();
+ if (SrcType->isPointerTy()) {
+ IntType = IntegerType::get(
+ CGF.getLLVMContext(),
+ CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
+ } else {
+ assert(SrcType->isIntegerTy());
+ IntType = cast<llvm::IntegerType>(SrcType);
+ }
+ Alignment = CGF.EmitScalarExpr(E->getArg(1));
+ Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
+ auto *One = llvm::ConstantInt::get(IntType, 1);
+ Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
+ }
+};
+} // namespace
+
+/// Generate (x & (y-1)) == 0.
+RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
+ BuiltinAlignArgs Args(E, *this);
+ llvm::Value *SrcAddress = Args.Src;
+ if (Args.SrcType->isPointerTy())
+ SrcAddress =
+ Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
+ return RValue::get(Builder.CreateICmpEQ(
+ Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
+ llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
+}
+
+/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
+/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
+/// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
+/// TODO: actually use ptrmask once most optimization passes know about it.
+RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
+ BuiltinAlignArgs Args(E, *this);
+ llvm::Value *SrcAddr = Args.Src;
+ if (Args.Src->getType()->isPointerTy())
+ SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
+ llvm::Value *SrcForMask = SrcAddr;
+ if (AlignUp) {
+ // When aligning up we have to first add the mask to ensure we go over the
+ // next alignment value and then align down to the next valid multiple.
+ // By adding the mask, we ensure that align_up on an already aligned
+ // value will not change the value.
+ SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
+ }
+ // Invert the mask to only clear the lower bits.
+ llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
+ llvm::Value *Result =
+ Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
+ if (Args.Src->getType()->isPointerTy()) {
+ /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
+ // Result = Builder.CreateIntrinsic(
+ // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
+ // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
+ Result->setName("aligned_intptr");
+ llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
+ // The result must point to the same underlying allocation. This means we
+ // can use an inbounds GEP to enable better optimization.
+ Value *Base = EmitCastToVoidPtr(Args.Src);
+ if (getLangOpts().isSignedOverflowDefined())
+ Result = Builder.CreateGEP(Base, Difference, "aligned_result");
+ else
+ Result = EmitCheckedInBoundsGEP(Base, Difference,
+ /*SignedIndices=*/true,
+ /*isSubtraction=*/!AlignUp,
+ E->getExprLoc(), "aligned_result");
+ Result = Builder.CreatePointerCast(Result, Args.SrcType);
+ // Emit an alignment assumption to ensure that the new alignment is
+ // propagated to loads/stores, etc.
+ EmitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
+ }
+ assert(Result->getType() == Args.SrcType);
+ return RValue::get(Result);
+}
+
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
@@ -14181,6 +14598,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
case WebAssembly::BI__builtin_wasm_bitselect: {
Value *V1 = EmitScalarExpr(E->getArg(0));
Value *V2 = EmitScalarExpr(E->getArg(1));
@@ -14189,6 +14614,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
ConvertType(E->getType()));
return Builder.CreateCall(Callee, {V1, V2, C});
}
+ case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
case WebAssembly::BI__builtin_wasm_any_true_i8x16:
case WebAssembly::BI__builtin_wasm_any_true_i16x8:
case WebAssembly::BI__builtin_wasm_any_true_i32x4: