diff options
Diffstat (limited to 'lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 261 |
1 files changed, 204 insertions, 57 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index b3d02f1f51c6..6ea0a325a429 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -420,10 +420,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType) { + llvm::IntegerType *ResType, + llvm::Value *EmittedE) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -432,9 +433,14 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, /// - A llvm::Argument (if E is a param with the pass_object_size attribute on /// it) /// - A call to the @llvm.objectsize intrinsic +/// +/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null +/// and we wouldn't otherwise try to reference a pass_object_size parameter, +/// we'll call @llvm.objectsize on EmittedE, rather than emitting E. llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, - llvm::IntegerType *ResType) { + llvm::IntegerType *ResType, + llvm::Value *EmittedE) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -457,16 +463,20 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. - if (Type == 3 || E->HasSideEffects(getContext())) + if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) return getDefaultBuiltinObjectSizeResult(Type, ResType); - // LLVM only supports 0 and 2, make sure that we pass along that - // as a boolean. - auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); - // FIXME: Get right address space. - llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); - return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); + Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); + assert(Ptr->getType()->isPointerTy() && + "Non-pointer passed to __builtin_object_size?"); + + Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + + // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. + Value *Min = Builder.getInt1((Type & 2) != 0); + // For GCC compatability, __builtin_object_size treat NULL as unknown size. + Value *NullIsUnknown = Builder.getTrue(); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); } // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we @@ -482,10 +492,12 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _interlockedbittestandset, + __fastfail, }; Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, - const CallExpr *E) { + const CallExpr *E) { switch (BuiltinID) { case MSVCIntrin::_BitScanForward: case MSVCIntrin::_BitScanReverse: { @@ -548,6 +560,22 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); + case MSVCIntrin::_interlockedbittestandset: { + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Or, Addr, + Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), + llvm::AtomicOrdering::SequentiallyConsistent); + // Shift the relevant bit to the least significant position, truncate to + // the result type, and test the low bit. + llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); + llvm::Value *Truncated = + Builder.CreateTrunc(Shifted, ConvertType(E->getType())); + return Builder.CreateAnd(Truncated, + ConstantInt::get(Truncated->getType(), 1)); + } + case MSVCIntrin::_InterlockedDecrement: { llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( @@ -566,6 +594,37 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, llvm::AtomicOrdering::SequentiallyConsistent); return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); } + + case MSVCIntrin::__fastfail: { + // Request immediate process termination from the kernel. The instruction + // sequences to do this are documented on MSDN: + // https://msdn.microsoft.com/en-us/library/dn774154.aspx + llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); + StringRef Asm, Constraints; + switch (ISA) { + default: + ErrorUnsupported(E, "__fastfail call for this architecture"); + break; + case llvm::Triple::x86: + case llvm::Triple::x86_64: + Asm = "int $$0x29"; + Constraints = "{cx}"; + break; + case llvm::Triple::thumb: + Asm = "udf #251"; + Constraints = "{r0}"; + break; + } + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CS.setAttributes(NoReturnAttr); + return CS.getInstruction(); + } } llvm_unreachable("Incorrect MSVC intrinsic!"); } @@ -932,7 +991,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. - return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); + return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, + /*EmittedE=*/nullptr)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -2195,16 +2255,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedXor16: case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); - case Builtin::BI__readfsdword: { - llvm::Type *IntTy = ConvertType(E->getType()); - Value *IntToPtr = - Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 257)); - LoadInst *Load = Builder.CreateAlignedLoad( - IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType())); - Load->setVolatile(true); - return RValue::get(Load); - } + case Builtin::BI_interlockedbittestandset: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); case Builtin::BI__exception_code: case Builtin::BI_exception_code: @@ -2218,9 +2271,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_setjmpex: { if (getTarget().getTriple().isOSMSVCRT()) { llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); @@ -2238,9 +2291,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI_setjmp: { if (getTarget().getTriple().isOSMSVCRT()) { - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Value *Buf = Builder.CreateBitOrPointerCast( EmitScalarExpr(E->getArg(0)), Int8PtrTy); llvm::CallSite CS; @@ -2276,6 +2329,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } + case Builtin::BI__fastfail: + return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); + case Builtin::BI__builtin_coro_size: { auto & Context = getContext(); auto SizeTy = Context.getSizeType(); @@ -2492,25 +2548,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned NumArgs = E->getNumArgs(); llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); - llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); - llvm::Value *Range = EmitScalarExpr(E->getArg(2)); + LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); + llvm::Value *Range = NDRangeL.getAddress().getPointer(); + llvm::Type *RangeTy = NDRangeL.getAddress().getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); - return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); + AttrBuilder B; + B.addAttribute(Attribute::ByVal); + llvm::AttributeList ByValAttrSet = + llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); + + auto RTCall = + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), + {Queue, Flags, Range, Block}); + RTCall->setAttributes(ByValAttrSet); + return RValue::get(RTCall); } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); @@ -2518,14 +2585,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, - IntTy}; + std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, + GenericVoidPtrTy, IntTy}; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -2555,12 +2622,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); - std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy, - Int32Ty, EventPtrTy, EventPtrTy, - Int8PtrTy}; + std::vector<llvm::Type *> ArgTys = { + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrTy, EventPtrTy, GenericVoidPtrTy}; std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, EventList, ClkEvent, Block}; @@ -2596,26 +2663,30 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), - "__get_kernel_work_group_size_impl"), - Arg)); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + "__get_kernel_work_group_size_impl"), + Arg)); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), "__get_kernel_preferred_work_group_multiple_impl"), Arg)); } case Builtin::BIprintf: - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) - return EmitCUDADevicePrintfCallExpr(E, ReturnValue); + if (getTarget().getTriple().isNVPTX()) + return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: @@ -7115,6 +7186,13 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, return EmitX86Select(CGF, Ops[3], Res, Ops[2]); } +static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, + llvm::Type *DstTy) { + unsigned NumberOfElements = DstTy->getVectorNumElements(); + Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); + return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == X86::BI__builtin_ms_va_start || @@ -7321,7 +7399,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: - return UndefValue::get(ConvertType(E->getType())); + // The x86 definition of "undef" is not the same as the LLVM definition + // (PR32176). We leave optimizing away an unnecessary zero constant to the + // IR optimizer and backend. + // TODO: If we had a "freeze" IR instruction to generate a fixed undef + // value, we should use that here instead of a zero. + return llvm::Constant::getNullValue(ConvertType(E->getType())); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: @@ -7408,6 +7491,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -7922,6 +8020,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // instruction, but it will create a memset that won't be optimized away. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); } + case X86::BI__ud2: + // llvm.trap makes a ud2a instruction on x86. + return EmitTrapCall(Intrinsic::trap); + case X86::BI__int2c: { + // This syscall signals a driver assertion failure in x86 NT kernels. + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + CallSite CS = Builder.CreateCall(IA); + CS.setAttributes(NoReturnAttr); + return CS.getInstruction(); + } + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 257)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 256)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } } } @@ -8326,6 +8463,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp: { + llvm::SmallVector<llvm::Value *, 5> Args; + for (unsigned I = 0; I != 5; ++I) + Args.push_back(EmitScalarExpr(E->getArg(I))); + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, + Args[0]->getType()); + return Builder.CreateCall(F, Args); + } case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: @@ -8391,7 +8536,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); - + case AMDGPU::BI__builtin_amdgcn_fmed3f: + case AMDGPU::BI__builtin_amdgcn_fmed3h: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); |