diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp | 478 |
1 files changed, 312 insertions, 166 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 0f8b1d6584b1..28b234b180fc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -51,49 +51,135 @@ using namespace llvm; AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) : CallLowering(&TLI) {} +static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, + MVT &LocVT) { + // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy + // hack because the DAG calls the assignment function with pre-legalized + // register typed values, not the raw type. + // + // This hack is not applied to return values which are not passed on the + // stack. + if (OrigVT == MVT::i1 || OrigVT == MVT::i8) + ValVT = LocVT = MVT::i8; + else if (OrigVT == MVT::i16) + ValVT = LocVT = MVT::i16; +} + +// Account for i1/i8/i16 stack passed value hack +static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { + const MVT ValVT = VA.getValVT(); + return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) + : LLT(VA.getLocVT()); +} + namespace { + +struct AArch64IncomingValueAssigner + : public CallLowering::IncomingValueAssigner { + AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, + CCAssignFn *AssignFnVarArg_) + : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} + + bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); + return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, + LocInfo, Info, Flags, State); + } +}; + +struct AArch64OutgoingValueAssigner + : public CallLowering::OutgoingValueAssigner { + const AArch64Subtarget &Subtarget; + + /// Track if this is used for a return instead of function argument + /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use + /// stack passed returns for them and cannot apply the type adjustment. + bool IsReturn; + + AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, + CCAssignFn *AssignFnVarArg_, + const AArch64Subtarget &Subtarget_, + bool IsReturn) + : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), + Subtarget(Subtarget_), IsReturn(IsReturn) {} + + bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv()); + bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); + + if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn) + applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); + + bool Res; + if (Info.IsFixed && !UseVarArgsCCForFixed) + Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); + else + Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); + + StackOffset = State.getNextStackOffset(); + return Res; + } +}; + struct IncomingArgHandler : public CallLowering::IncomingValueHandler { - IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - CCAssignFn *AssignFn) - : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {} + IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) + : IncomingValueHandler(MIRBuilder, MRI) {} Register getStackAddress(uint64_t Size, int64_t Offset, - MachinePointerInfo &MPO) override { + MachinePointerInfo &MPO, + ISD::ArgFlagsTy Flags) override { auto &MFI = MIRBuilder.getMF().getFrameInfo(); - int FI = MFI.CreateFixedObject(Size, Offset, true); + + // Byval is assumed to be writable memory, but other stack passed arguments + // are not. + const bool IsImmutable = !Flags.isByVal(); + + int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable); MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI); - StackUsed = std::max(StackUsed, Size + Offset); return AddrReg.getReg(0); } + LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const override { + // For pointers, we just need to fixup the integer types reported in the + // CCValAssign. + if (Flags.isPointer()) + return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); + return getStackValueStoreTypeHack(VA); + } + void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override { markPhysRegUsed(PhysReg); - switch (VA.getLocInfo()) { - default: - MIRBuilder.buildCopy(ValVReg, PhysReg); - break; - case CCValAssign::LocInfo::SExt: - case CCValAssign::LocInfo::ZExt: - case CCValAssign::LocInfo::AExt: { - auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); - MIRBuilder.buildTrunc(ValVReg, Copy); - break; - } - } + IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); - // The reported memory location may be wider than the value. - const LLT RegTy = MRI.getType(ValVReg); - MemSize = std::min(static_cast<uint64_t>(RegTy.getSizeInBytes()), MemSize); + LLT ValTy(VA.getValVT()); + LLT LocTy(VA.getLocVT()); + + // Fixup the types for the DAG compatibility hack. + if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) + std::swap(ValTy, LocTy); + else { + // The calling code knows if this is a pointer or not, we're only touching + // the LocTy for the i8/i16 hack. + assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); + LocTy = MemTy; + } auto MMO = MF.getMachineMemOperand( - MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemSize, + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } @@ -102,14 +188,11 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler { /// parameters (it's a basic-block live-in), and a call instruction /// (it's an implicit-def of the BL). virtual void markPhysRegUsed(MCRegister PhysReg) = 0; - - uint64_t StackUsed; }; struct FormalArgHandler : public IncomingArgHandler { - FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - CCAssignFn *AssignFn) - : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) + : IncomingArgHandler(MIRBuilder, MRI) {} void markPhysRegUsed(MCRegister PhysReg) override { MIRBuilder.getMRI()->addLiveIn(PhysReg); @@ -119,8 +202,8 @@ struct FormalArgHandler : public IncomingArgHandler { struct CallReturnHandler : public IncomingArgHandler { CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB, CCAssignFn *AssignFn) - : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + MachineInstrBuilder MIB) + : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} void markPhysRegUsed(MCRegister PhysReg) override { MIB.addDef(PhysReg, RegState::Implicit); @@ -129,22 +212,34 @@ struct CallReturnHandler : public IncomingArgHandler { MachineInstrBuilder MIB; }; +/// A special return arg handler for "returned" attribute arg calls. +struct ReturnedArgCallReturnHandler : public CallReturnHandler { + ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, + MachineInstrBuilder MIB) + : CallReturnHandler(MIRBuilder, MRI, MIB) {} + + void markPhysRegUsed(MCRegister PhysReg) override {} +}; + struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB, CCAssignFn *AssignFn, - CCAssignFn *AssignFnVarArg, bool IsTailCall = false, + MachineInstrBuilder MIB, bool IsTailCall = false, int FPDiff = 0) - : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), - AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff), - StackSize(0), SPReg(0) {} + : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), + FPDiff(FPDiff), + Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} Register getStackAddress(uint64_t Size, int64_t Offset, - MachinePointerInfo &MPO) override { + MachinePointerInfo &MPO, + ISD::ArgFlagsTy Flags) override { MachineFunction &MF = MIRBuilder.getMF(); LLT p0 = LLT::pointer(0, 64); LLT s64 = LLT::scalar(64); if (IsTailCall) { + assert(!Flags.isByVal() && "byval unhandled with tail calls"); + Offset += FPDiff; int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); auto FIReg = MIRBuilder.buildFrameIndex(p0, FI); @@ -163,6 +258,17 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { return AddrReg.getReg(0); } + /// We need to fixup the reported store size for certain value types because + /// we invert the interpretation of ValVT and LocVT in certain cases. This is + /// for compatability with the DAG call lowering implementation, which we're + /// currently building on top of. + LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const override { + if (Flags.isPointer()) + return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); + return getStackValueStoreTypeHack(VA); + } + void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override { MIB.addUse(PhysReg, RegState::Implicit); @@ -170,105 +276,60 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { MIRBuilder.buildCopy(PhysReg, ExtReg); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); - auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, + auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ValVReg, Addr, *MMO); } - void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr, - uint64_t Size, MachinePointerInfo &MPO, + void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, + Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { - unsigned MaxSize = Size * 8; + unsigned MaxSize = MemTy.getSizeInBytes() * 8; // For varargs, we always want to extend them to 8 bytes, in which case // we disable setting a max. if (!Arg.IsFixed) MaxSize = 0; - assert(Arg.Regs.size() == 1); + Register ValVReg = Arg.Regs[RegIndex]; + if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { + MVT LocVT = VA.getLocVT(); + MVT ValVT = VA.getValVT(); - Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt - ? extendRegister(Arg.Regs[0], VA, MaxSize) - : Arg.Regs[0]; + if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { + std::swap(ValVT, LocVT); + MemTy = LLT(VA.getValVT()); + } - // If we extended we might need to adjust the MMO's Size. - const LLT RegTy = MRI.getType(ValVReg); - if (RegTy.getSizeInBytes() > Size) - Size = RegTy.getSizeInBytes(); + ValVReg = extendRegister(ValVReg, VA, MaxSize); + } else { + // The store does not cover the full allocated stack slot. + MemTy = LLT(VA.getValVT()); + } - assignValueToAddress(ValVReg, Addr, Size, MPO, VA); - } - - bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, - ISD::ArgFlagsTy Flags, - CCState &State) override { - bool Res; - if (Info.IsFixed) - Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); - else - Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); - - StackSize = State.getNextStackOffset(); - return Res; + assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); } MachineInstrBuilder MIB; - CCAssignFn *AssignFnVarArg; + bool IsTailCall; /// For tail calls, the byte offset of the call's argument area from the /// callee's. Unused elsewhere. int FPDiff; - uint64_t StackSize; // Cache the SP register vreg if we need it more than once in this call site. Register SPReg; + + const AArch64Subtarget &Subtarget; }; } // namespace static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { - return CallConv == CallingConv::Fast && TailCallOpt; -} - -void AArch64CallLowering::splitToValueTypes( - const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const { - const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); - LLVMContext &Ctx = OrigArg.Ty->getContext(); - - SmallVector<EVT, 4> SplitVTs; - SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - - if (SplitVTs.size() == 0) - return; - - if (SplitVTs.size() == 1) { - // No splitting to do, but we want to replace the original type (e.g. [1 x - // double] -> double). - SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags[0], OrigArg.IsFixed); - return; - } - - // Create one ArgInfo for each virtual register in the original ArgInfo. - assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); - - bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( - OrigArg.Ty, CallConv, false); - for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { - Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); - SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0], - OrigArg.IsFixed); - if (NeedsRegBlock) - SplitArgs.back().Flags[0].setInConsecutiveRegs(); - } - - SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); + return (CallConv == CallingConv::Fast && TailCallOpt) || + CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -284,6 +345,7 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, if (!VRegs.empty()) { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); @@ -300,20 +362,16 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, CallingConv::ID CC = F.getCallingConv(); for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) { - LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split"); - return false; - } - Register CurVReg = VRegs[i]; - ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)}; + ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0}; setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); // i1 is a special case because SDAG i1 true is naturally zero extended // when widened using ANYEXT. We need to do it explicitly here. if (MRI.getType(CurVReg).getSizeInBits() == 1) { CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); - } else { + } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == + 1) { // Some types will need extending as specified by the CC. MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); if (EVT(NewVT) != SplitEVTs[i]) { @@ -357,13 +415,18 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)}) .getReg(0); } else { - LLVM_DEBUG(dbgs() << "Could not handle ret ty"); + LLVM_DEBUG(dbgs() << "Could not handle ret ty\n"); return false; } } else { - // A scalar extend. - CurVReg = - MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0); + // If the split EVT was a <1 x T> vector, and NewVT is T, then we + // don't have to do anything since we don't distinguish between the + // two. + if (NewLLT != MRI.getType(CurVReg)) { + // A scalar extend. + CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) + .getReg(0); + } } } } @@ -372,11 +435,14 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, // Reset the arg flags after modifying CurVReg. setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); } - splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC); + splitToValueTypes(CurArgInfo, SplitArgs, DL, CC); } - OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn); - Success = handleAssignments(MIRBuilder, SplitArgs, Handler); + AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, + /*IsReturn*/ true); + OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); + Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs, + MIRBuilder, CC, F.isVarArg()); } if (SwiftErrorVReg) { @@ -431,12 +497,20 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, } } -bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const { +bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { + auto &F = MF.getFunction(); if (isa<ScalableVectorType>(F.getReturnType())) return true; - return llvm::any_of(F.args(), [](const Argument &A) { - return isa<ScalableVectorType>(A.getType()); - }); + if (llvm::any_of(F.args(), [](const Argument &A) { + return isa<ScalableVectorType>(A.getType()); + })) + return true; + const auto &ST = MF.getSubtarget<AArch64Subtarget>(); + if (!ST.hasNEON() || !ST.hasFPARMv8()) { + LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n"); + return true; + } + return false; } bool AArch64CallLowering::lowerFormalArguments( @@ -453,10 +527,13 @@ bool AArch64CallLowering::lowerFormalArguments( if (DL.getTypeStoreSize(Arg.getType()).isZero()) continue; - ArgInfo OrigArg{VRegs[i], Arg.getType()}; + ArgInfo OrigArg{VRegs[i], Arg, i}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv()); + if (Arg.hasAttribute(Attribute::SwiftAsync)) + MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); + + splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); ++i; } @@ -467,12 +544,14 @@ bool AArch64CallLowering::lowerFormalArguments( CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); - FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); + FormalArgHandler Handler(MIRBuilder, MRI); + if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder, + F.getCallingConv(), F.isVarArg())) return false; AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); - uint64_t StackOffset = Handler.StackUsed; + uint64_t StackOffset = Assigner.StackOffset; if (F.isVarArg()) { auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); if (!Subtarget.isTargetDarwin()) { @@ -482,7 +561,8 @@ bool AArch64CallLowering::lowerFormalArguments( } // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. - StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); + StackOffset = + alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); @@ -521,8 +601,9 @@ bool AArch64CallLowering::lowerFormalArguments( } /// Return true if the calling convention is one that we can guarantee TCO for. -static bool canGuaranteeTCO(CallingConv::ID CC) { - return CC == CallingConv::Fast; +static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { + return (CC == CallingConv::Fast && GuaranteeTailCalls) || + CC == CallingConv::Tail || CC == CallingConv::SwiftTail; } /// Return true if we might ever do TCO for calls with this calling convention. @@ -531,9 +612,12 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { case CallingConv::C: case CallingConv::PreserveMost: case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + case CallingConv::Fast: return true; default: - return canGuaranteeTCO(CC); + return false; } } @@ -567,9 +651,12 @@ bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = getAssignFnsForCC(CallerCC, TLI); - if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, - *CalleeAssignFnVarArg, *CallerAssignFnFixed, - *CallerAssignFnVarArg)) + AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, + CalleeAssignFnVarArg); + AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, + CallerAssignFnVarArg); + + if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) return false; // Make sure that the caller and callee preserve all of the same registers. @@ -592,9 +679,11 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( return true; const Function &CallerF = MF.getFunction(); + LLVMContext &Ctx = CallerF.getContext(); CallingConv::ID CalleeCC = Info.CallConv; CallingConv::ID CallerCC = CallerF.getCallingConv(); const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); CCAssignFn *AssignFnFixed; CCAssignFn *AssignFnVarArg; @@ -602,9 +691,11 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( // We have outgoing arguments. Make sure that we can tail call with them. SmallVector<CCValAssign, 16> OutLocs; - CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); + CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); - if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { + AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, + Subtarget, /*IsReturn*/ false); + if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) { LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); return false; } @@ -718,8 +809,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization( } // If we have -tailcallopt, then we're done. - if (MF.getTarget().Options.GuaranteedTailCallOpt) - return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); + if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt)) + return CalleeCC == CallerF.getCallingConv(); // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). // Try to find cases where we can do that. @@ -762,6 +853,24 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, return AArch64::TCRETURNri; } +static const uint32_t * +getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, + AArch64CallLowering::CallLoweringInfo &Info, + const AArch64RegisterInfo &TRI, MachineFunction &MF) { + const uint32_t *Mask; + if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { + // For 'this' returns, use the X0-preserving mask if applicable + Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); + if (!Mask) { + OutArgs[0].Flags[0].setReturned(false); + Mask = TRI.getCallPreservedMask(MF, Info.CallConv); + } + } else { + Mask = TRI.getCallPreservedMask(MF, Info.CallConv); + } + return Mask; +} + bool AArch64CallLowering::lowerTailCall( MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl<ArgInfo> &OutArgs) const { @@ -772,7 +881,9 @@ bool AArch64CallLowering::lowerTailCall( AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); // True when we're tail calling, but without -tailcallopt. - bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && + Info.CallConv != CallingConv::Tail && + Info.CallConv != CallingConv::SwiftTail; // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 // register class. Until we can do that, we should fall back here. @@ -801,9 +912,10 @@ bool AArch64CallLowering::lowerTailCall( MIB.addImm(0); // Tell the call which registers are clobbered. - auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + auto TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); - if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) + if (Subtarget.hasCustomCallingConv()) TRI->UpdateCustomCallPreservedMask(MF, &Mask); MIB.addRegMask(Mask); @@ -828,7 +940,11 @@ bool AArch64CallLowering::lowerTailCall( unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); SmallVector<CCValAssign, 16> OutLocs; CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); - analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); + + AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, + Subtarget, /*IsReturn*/ false); + if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) + return false; // The callee will pop the argument stack as a tail call. Thus, we must // keep it 16-byte aligned. @@ -839,6 +955,11 @@ bool AArch64CallLowering::lowerTailCall( // actually shrink the stack. FPDiff = NumReusableBytes - NumBytes; + // Update the required reserved area if this is the tail call requiring the + // most argument stack space. + if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) + FuncInfo->setTailCallReservedStack(-FPDiff); + // The stack pointer must be 16-byte aligned at all times it's used for a // memory operation, which in practice means at *all* times and in // particular across call boundaries. Therefore our own arguments started at @@ -849,12 +970,18 @@ bool AArch64CallLowering::lowerTailCall( const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); + AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, + Subtarget, /*IsReturn*/ false); + // Do the actual argument marshalling. - OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg, true, FPDiff); - if (!handleAssignments(MIRBuilder, OutArgs, Handler)) + OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, + /*IsTailCall*/ true, FPDiff); + if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, + CalleeCC, Info.IsVarArg)) return false; + Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); + if (Info.IsVarArg && Info.IsMustTailCall) { // Now we know what's being passed to the function. Add uses to the call for // the forwarded registers that we *aren't* passing as parameters. This will @@ -880,12 +1007,12 @@ bool AArch64CallLowering::lowerTailCall( // sequence start and end here. if (!IsSibCall) { MIB->getOperand(1).setImm(FPDiff); - CallSeqStart.addImm(NumBytes).addImm(0); + CallSeqStart.addImm(0).addImm(0); // End the call sequence *before* emitting the call. Normally, we would // tidy the frame up after the call. However, here, we've laid out the // parameters so that when SP is reset, they will be in the correct // location. - MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0); + MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0); } // Now we can add the actual call instruction to the correct basic block. @@ -913,7 +1040,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 8> OutArgs; for (auto &OrigArg : Info.OrigArgs) { - splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv); + splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); // AAPCS requires that we zero-extend i1 to 8 bits by the caller. if (OrigArg.Ty->isIntegerTy(1)) OutArgs.back().Flags[0].setZExt(); @@ -921,7 +1048,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 8> InArgs; if (!Info.OrigRet.Ty->isVoidTy()) - splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv()); + splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv); // If we can lower as a tail call, do that instead. bool CanTailCallOpt = @@ -956,8 +1083,20 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MIB.add(Info.Callee); // Tell the call which registers are clobbered. - auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv); + const uint32_t *Mask; + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + const auto *TRI = Subtarget.getRegisterInfo(); + + AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, + Subtarget, /*IsReturn*/ false); + // Do the actual argument marshalling. + OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); + if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder, + Info.CallConv, Info.IsVarArg)) + return false; + + Mask = getMaskForArgs(OutArgs, Info, *TRI, MF); + if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) TRI->UpdateCustomCallPreservedMask(MF, &Mask); MIB.addRegMask(Mask); @@ -965,12 +1104,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (TRI->isAnyArgRegReserved(MF)) TRI->emitReservedArgRegCallError(MF); - // Do the actual argument marshalling. - OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg, false); - if (!handleAssignments(MIRBuilder, OutArgs, Handler)) - return false; - // Now we can add the actual call instruction to the correct basic block. MIRBuilder.insertInstr(MIB); @@ -978,17 +1111,26 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // instruction, it must have a register class matching the // constraint of that instruction. if (Info.Callee.isReg()) - constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, - MIB->getDesc(), Info.Callee, 0); + constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(), + *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(), + Info.Callee, 0); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an // implicit-define of the call instruction. if (!Info.OrigRet.Ty->isVoidTy()) { CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv); - CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn); - if (!handleAssignments(MIRBuilder, InArgs, Handler)) + CallReturnHandler Handler(MIRBuilder, MRI, MIB); + bool UsingReturnedArg = + !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); + + AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, + /*IsReturn*/ false); + ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); + if (!determineAndHandleAssignments( + UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs, + MIRBuilder, Info.CallConv, Info.IsVarArg, + UsingReturnedArg ? OutArgs[0].Regs[0] : Register())) return false; } @@ -1000,13 +1142,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, uint64_t CalleePopBytes = doesCalleeRestoreStack(Info.CallConv, MF.getTarget().Options.GuaranteedTailCallOpt) - ? alignTo(Handler.StackSize, 16) + ? alignTo(Assigner.StackOffset, 16) : 0; - CallSeqStart.addImm(Handler.StackSize).addImm(0); + CallSeqStart.addImm(Assigner.StackOffset).addImm(0); MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) - .addImm(Handler.StackSize) + .addImm(Assigner.StackOffset) .addImm(CalleePopBytes); return true; } + +bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { + return Ty.getSizeInBits() == 64; +} |