aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp1677
1 files changed, 1014 insertions, 663 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 7733fe7f7b24..fc5ef02e8457 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -33,14 +34,18 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
+using namespace MIPatternMatch;
namespace {
@@ -98,15 +103,23 @@ private:
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
- int64_t CmpConstant,
- const CmpInst::Predicate &Pred,
+ ///@{
+ /// Helper functions for selectCompareBranch.
+ bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
+ MachineIRBuilder &MIB) const;
+ bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
+ ///@}
+
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
@@ -147,6 +160,7 @@ private:
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -159,20 +173,72 @@ private:
MachineIRBuilder &MIRBuilder) const;
// Emit an integer compare between LHS and RHS, which checks for Predicate.
- //
- // This returns the produced compare instruction, and the predicate which
- // was ultimately used in the compare. The predicate may differ from what
- // is passed in \p Predicate due to optimization.
- std::pair<MachineInstr *, CmpInst::Predicate>
- emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
+ MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit a floating point comparison between \p LHS and \p RHS.
+ /// \p Pred if given is the intended predicate to use.
+ MachineInstr *emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> = None) const;
+
+ MachineInstr *emitInstr(unsigned Opcode,
+ std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps,
+ MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns = None) const;
+ /// Helper function to emit an add or sub instruction.
+ ///
+ /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
+ /// in a specific order.
+ ///
+ /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
+ ///
+ /// \code
+ /// const std::array<std::array<unsigned, 2>, 4> Table {
+ /// {{AArch64::ADDXri, AArch64::ADDWri},
+ /// {AArch64::ADDXrs, AArch64::ADDWrs},
+ /// {AArch64::ADDXrr, AArch64::ADDWrr},
+ /// {AArch64::SUBXri, AArch64::SUBWri},
+ /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ /// \endcode
+ ///
+ /// Each row in the table corresponds to a different addressing mode. Each
+ /// column corresponds to a different register size.
+ ///
+ /// \attention Rows must be structured as follows:
+ /// - Row 0: The ri opcode variants
+ /// - Row 1: The rs opcode variants
+ /// - Row 2: The rr opcode variants
+ /// - Row 3: The ri opcode variants for negative immediates
+ /// - Row 4: The rx opcode variants
+ ///
+ /// \attention Columns must be structured as follows:
+ /// - Column 0: The 64-bit opcode variants
+ /// - Column 1: The 32-bit opcode variants
+ ///
+ /// \p Dst is the destination register of the binop to emit.
+ /// \p LHS is the left-hand operand of the binop to emit.
+ /// \p RHS is the right-hand operand of the binop to emit.
+ MachineInstr *emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
+ MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitTST(const Register &LHS, const Register &RHS,
+ MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
+ AArch64CC::CondCode CC,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
@@ -184,9 +250,24 @@ private:
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const;
- /// Emit a CSet for a compare.
+ /// Emit a CSet for an integer compare.
+ ///
+ /// \p DefReg is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
+ /// Emit a CSet for a FP compare.
+ ///
+ /// \p Dst is expected to be a 32-bit scalar register.
+ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit the overflow op for \p Opcode.
+ ///
+ /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
+ /// G_USUBO, etc.
+ std::pair<MachineInstr *, AArch64CC::CondCode>
+ emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
@@ -195,6 +276,11 @@ private:
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
+ /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+ MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const;
+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
@@ -316,13 +402,6 @@ private:
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
- MachineOperand &RHS,
- CmpInst::Predicate &Predicate,
- MachineIRBuilder &MIB) const;
- MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIB) const;
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
@@ -498,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
- Immed = ValAndVReg->Value;
+ Immed = ValAndVReg->Value.getSExtValue();
} else
return None;
return Immed;
@@ -786,6 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
+ (void)KnownValid;
#endif
return ValidCopy;
};
@@ -932,44 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
-static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI) {
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
- AArch64::GPRRegBankID);
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
- if (Ty == LLT::scalar(32))
- return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
- else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
- return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
- return 0;
-}
+MachineInstr *
+AArch64InstructionSelector::emitSelect(Register Dst, Register True,
+ Register False, AArch64CC::CondCode CC,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
+ RBI.getRegBank(True, MRI, TRI)->getID() &&
+ "Expected both select operands to have the same regbank?");
+ LLT Ty = MRI.getType(True);
+ if (Ty.isVector())
+ return nullptr;
+ const unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "Expected 32 bit or 64 bit select only?");
+ const bool Is32Bit = Size == 32;
+ if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
+ unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
+ auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
+ return &*FCSel;
+ }
+
+ // By default, we'll try and emit a CSEL.
+ unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
+ bool Optimized = false;
+ auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
+ &Optimized](Register &Reg, Register &OtherReg,
+ bool Invert) {
+ if (Optimized)
+ return false;
-/// Helper function to select the opcode for a G_FCMP.
-static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
- // If this is a compare against +0.0, then we don't have to explicitly
- // materialize a constant.
- const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
- bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
- unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
- if (OpSize != 32 && OpSize != 64)
- return 0;
- unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
- {AArch64::FCMPSri, AArch64::FCMPDri}};
- return CmpOpcTbl[ShouldUseImm][OpSize == 64];
-}
+ // Attempt to fold:
+ //
+ // %sub = G_SUB 0, %x
+ // %select = G_SELECT cc, %reg, %sub
+ //
+ // Into:
+ // %select = CSNEG %reg, %x, cc
+ Register MatchReg;
+ if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %xor = G_XOR %x, -1
+ // %select = G_SELECT cc, %reg, %xor
+ //
+ // Into:
+ // %select = CSINV %reg, %x, cc
+ if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %add = G_ADD %x, 1
+ // %select = G_SELECT cc, %reg, %add
+ //
+ // Into:
+ // %select = CSINC %reg, %x, cc
+ if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
-/// Returns true if \p P is an unsigned integer comparison predicate.
-static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
- switch (P) {
- default:
return false;
- case CmpInst::ICMP_UGT:
- case CmpInst::ICMP_UGE:
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_ULE:
- return true;
- }
+ };
+
+ // Helper lambda which tries to use CSINC/CSINV for the instruction when its
+ // true/false values are constants.
+ // FIXME: All of these patterns already exist in tablegen. We should be
+ // able to import these.
+ auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
+ &Optimized]() {
+ if (Optimized)
+ return false;
+ auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ if (!TrueCst && !FalseCst)
+ return false;
+
+ Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+ if (TrueCst && FalseCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ int64_t F = FalseCst->Value.getSExtValue();
+
+ if (T == 0 && F == 1) {
+ // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+
+ if (T == 0 && F == -1) {
+ // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+ }
+
+ if (TrueCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ if (T == 1) {
+ // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+
+ if (T == -1) {
+ // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+ }
+
+ if (FalseCst) {
+ int64_t F = FalseCst->Value.getSExtValue();
+ if (F == 1) {
+ // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ False = ZReg;
+ return true;
+ }
+
+ if (F == -1) {
+ // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ False = ZReg;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
+ Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
+ Optimized |= TryOptSelectCst();
+ auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
+ return &*SelectInst;
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
@@ -1099,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
}
if (VRegAndVal)
- C = VRegAndVal->Value;
+ C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1109,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
auto VRegAndVal =
getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
- C = VRegAndVal->Value;
+ C = VRegAndVal->Value.getSExtValue();
break;
}
}
@@ -1211,8 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
}
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
- MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
- MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
+ MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const {
+ assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
// Given something like this:
//
// %x = ...Something...
@@ -1230,65 +1440,96 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
//
// TBNZ %x %bb.3
//
- if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
- return false;
-
- // Need to be comparing against 0 to fold.
- if (CmpConstant != 0)
- return false;
-
- MachineRegisterInfo &MRI = *MIB.getMRI();
-
- // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
- // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
- // so folding would be redundant.
- if (Pred != CmpInst::Predicate::ICMP_EQ &&
- Pred != CmpInst::Predicate::ICMP_NE)
- return false;
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit =
- getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
- if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
+ auto MaybeBit = getConstantVRegValWithLookThrough(
+ AndInst.getOperand(2).getReg(), *MIB.getMRI());
+ if (!MaybeBit)
+ return false;
+
+ int32_t Bit = MaybeBit->Value.exactLogBase2();
+ if (Bit < 0)
return false;
- uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
- Register TestReg = AndInst->getOperand(1).getReg();
- bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
+ Register TestReg = AndInst.getOperand(1).getReg();
// Emit a TB(N)Z.
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
return true;
}
-bool AArch64InstructionSelector::selectCompareBranch(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+ bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const {
+ assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+ AArch64::GPRRegBankID &&
+ "Expected GPRs only?");
+ auto Ty = MRI.getType(CompareReg);
+ unsigned Width = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected scalar only?");
+ assert(Width <= 64 && "Expected width to be at most 64?");
+ static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+ {AArch64::CBNZW, AArch64::CBNZX}};
+ unsigned Opc = OpcTable[IsNegative][Width == 64];
+ auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+ constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+ return &*BranchMI;
+}
- const Register CondReg = I.getOperand(0).getReg();
+bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
+ MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
+ assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
+ // totally clean. Some of them require two branches to implement.
+ auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
+ emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
+ Pred);
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- MachineInstr *CCMI = MRI.getVRegDef(CondReg);
- if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
- CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
- if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
+ if (CC2 != AArch64CC::AL)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
+ //
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (!ProduceNonFlagSettingCondBr)
return false;
- Register LHS = CCMI->getOperand(2).getReg();
- Register RHS = CCMI->getOperand(3).getReg();
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto Pred =
+ static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
+ Register LHS = ICmp.getOperand(2).getReg();
+ Register RHS = ICmp.getOperand(3).getReg();
+
+ // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- MachineIRBuilder MIB(I);
- CmpInst::Predicate Pred =
- (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
- MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+ MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
//
// Handle non-commutative condition codes first.
// Note that we don't want to do this when we have a G_AND because it can
// become a tst. The tst will make the test bit in the TB(N)Z redundant.
- if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
- int64_t C = VRegAndVal->Value;
+ if (VRegAndVal && !AndInst) {
+ int64_t C = VRegAndVal->Value.getSExtValue();
// When we have a greater-than comparison, we can just test if the msb is
// zero.
@@ -1309,54 +1550,97 @@ bool AArch64InstructionSelector::selectCompareBranch(
}
}
- if (!VRegAndVal) {
- std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- LHSMI = getDefIgnoringCopies(LHS, MRI);
+ // Attempt to handle commutative condition codes. Right now, that's only
+ // eq/ne.
+ if (ICmpInst::isEquality(Pred)) {
+ if (!VRegAndVal) {
+ std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+ }
+
+ if (VRegAndVal && VRegAndVal->Value == 0) {
+ // If there's a G_AND feeding into this branch, try to fold it away by
+ // emitting a TB(N)Z instead.
+ //
+ // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
+ // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
+ // would be redundant.
+ if (AndInst &&
+ tryOptAndIntoCompareBranch(
+ *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, try to emit a CB(N)Z instead.
+ auto LHSTy = MRI.getType(LHS);
+ if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+ emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
}
- if (!VRegAndVal || VRegAndVal->Value != 0) {
- // If we can't select a CBZ then emit a cmp + Bcc.
- MachineInstr *Cmp;
- std::tie(Cmp, Pred) = emitIntegerCompare(
- CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
- if (!Cmp)
- return false;
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
- I.eraseFromParent();
+ return false;
+}
+
+bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
return true;
+
+ // Couldn't optimize. Emit a compare + a Bcc.
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto PredOp = ICmp.getOperand(1);
+ emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+ static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ Register CondReg = I.getOperand(0).getReg();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
+ CondReg = CCMI->getOperand(1).getReg();
+ CCMI = MRI.getVRegDef(CondReg);
}
- // Try to emit a TB(N)Z for an eq or ne condition.
- if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
- MIB)) {
+ // Try to select the G_BRCOND using whatever is feeding the condition if
+ // possible.
+ MachineIRBuilder MIB(I);
+ unsigned CCMIOpc = CCMI->getOpcode();
+ if (CCMIOpc == TargetOpcode::G_FCMP)
+ return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
+ if (CCMIOpc == TargetOpcode::G_ICMP)
+ return selectCompareBranchFedByICmp(I, *CCMI, MIB);
+
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (ProduceNonFlagSettingCondBr) {
+ emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
+ I.getOperand(1).getMBB(), MIB);
I.eraseFromParent();
return true;
}
- const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
- if (RB.getID() != AArch64::GPRRegBankID)
- return false;
- if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
- return false;
-
- const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
- unsigned CBOpc = 0;
- if (CmpWidth <= 32)
- CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
- else if (CmpWidth == 64)
- CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
- else
- return false;
-
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
- .addUse(LHS)
- .addMBB(DestMBB)
- .constrainAllUses(TII, TRI, RBI);
-
+ // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
+ auto TstMI =
+ MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ auto Bcc = MIB.buildInstr(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
- return true;
+ return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
}
/// Returns the element immediate value of a vector shift operand if found.
@@ -1377,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
return None;
if (Idx == 1)
- ImmVal = VRegAndVal->Value;
- if (ImmVal != VRegAndVal->Value)
+ ImmVal = VRegAndVal->Value.getSExtValue();
+ if (ImmVal != VRegAndVal->Value.getSExtValue())
return None;
}
@@ -1441,6 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL(
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
@@ -1457,9 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
return true;
}
-bool AArch64InstructionSelector::selectVectorASHR(
+bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_ASHR);
+ assert(I.getOpcode() == TargetOpcode::G_ASHR ||
+ I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
@@ -1468,25 +1761,40 @@ bool AArch64InstructionSelector::selectVectorASHR(
if (!Ty.isVector())
return false;
+ bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
+
+ // We expect the immediate case to be lowered in the PostLegalCombiner to
+ // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
+
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
- const TargetRegisterClass *RC = nullptr;
+ const TargetRegisterClass *RC =
+ getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
if (Ty == LLT::vector(2, 64)) {
- Opc = AArch64::SSHLv2i64;
+ Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
- RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(4, 32)) {
- Opc = AArch64::SSHLv4i32;
+ Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
- RC = &AArch64::FPR128RegClass;
} else if (Ty == LLT::vector(2, 32)) {
- Opc = AArch64::SSHLv2i32;
+ Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
- RC = &AArch64::FPR64RegClass;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
+ NegOpc = AArch64::NEGv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
+ NegOpc = AArch64::NEGv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
+ NegOpc = AArch64::NEGv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
+ NegOpc = AArch64::NEGv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
@@ -1569,7 +1877,6 @@ void AArch64InstructionSelector::materializeLargeCMVal(
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
- return;
}
bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
@@ -1624,6 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MRI.setType(DstReg, LLT::scalar(64));
return true;
}
+ case AArch64::G_DUP: {
+ // Convert the type from p0 to s64 to help selection.
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (!DstTy.getElementType().isPointer())
+ return false;
+ MachineIRBuilder MIB(I);
+ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
+ MRI.setType(I.getOperand(0).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(1).setReg(NewSrc.getReg(0));
+ return true;
+ }
+ case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_SITOFP: {
+ // If both source and destination regbanks are FPR, then convert the opcode
+ // to G_SITOF so that the importer can select it to an fpr variant.
+ // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
+ // copy.
+ Register SrcReg = I.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ return false;
+
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
+ if (I.getOpcode() == TargetOpcode::G_SITOFP)
+ I.setDesc(TII.get(AArch64::G_SITOF));
+ else
+ I.setDesc(TII.get(AArch64::G_UITOF));
+ return true;
+ }
+ return false;
+ }
default:
return false;
}
@@ -1664,6 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
}
+
+ // Also take the opportunity here to try to do some optimization.
+ // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
+ Register NegatedReg;
+ if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
+ return true;
+ I.getOperand(2).setReg(NegatedReg);
+ I.setDesc(TII.get(TargetOpcode::G_SUB));
return true;
}
@@ -1753,6 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
+ case TargetOpcode::G_BR: {
+ // If the branch jumps to the fallthrough block, don't bother emitting it.
+ // Only do this for -O0 for a good code size improvement, because when
+ // optimizations are enabled we want to leave this choice to
+ // MachineBlockPlacement.
+ bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
+ if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -1872,48 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineIRBuilder MIB(I);
switch (Opcode) {
- case TargetOpcode::G_BRCOND: {
- if (Ty.getSizeInBits() > 32) {
- // We shouldn't need this on AArch64, but it would be implemented as an
- // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
- // bit being tested is < 32.
- LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
- << ", expected at most 32-bits");
- return false;
- }
-
- const Register CondReg = I.getOperand(0).getReg();
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
- return true;
-
- if (ProduceNonFlagSettingCondBr) {
- auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
- .addUse(CondReg)
- .addImm(/*bit offset=*/0)
- .addMBB(DestMBB);
-
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
- } else {
- auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
- .addDef(AArch64::WZR)
- .addUse(CondReg)
- .addImm(1);
- constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
- auto Bcc =
- BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
- .addImm(AArch64CC::EQ)
- .addMBB(DestMBB);
-
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
- }
- }
+ case TargetOpcode::G_BRCOND:
+ return selectCompareBranch(I, MF, MRI);
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
@@ -1993,6 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT s128 = LLT::scalar(128);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
@@ -2002,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64) {
+ if (Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s32 << " or " << s64
- << '\n');
+ << " or " << s128 << '\n');
return false;
}
@@ -2018,7 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
- if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
+ // 0.0 is not covered by tablegen for FP128. So we will handle this
+ // scenario in the code here.
+ if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
} else {
// s32 and s64 are covered by tablegen.
@@ -2045,15 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Either emit a FMOV, or emit a copy to emit a normal mov.
const TargetRegisterClass &GPRRC =
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
+ const TargetRegisterClass &FPRRC =
+ DefSize == 32 ? AArch64::FPR32RegClass
+ : (DefSize == 64 ? AArch64::FPR64RegClass
+ : AArch64::FPR128RegClass);
// Can we use a FMOV instruction to represent the immediate?
if (emitFMovForFConstant(I, MRI))
return true;
// For 64b values, emit a constant pool load instead.
- if (DefSize == 64) {
+ if (DefSize == 64 || DefSize == 128) {
auto *FPImm = I.getOperand(1).getFPImm();
MachineIRBuilder MIB(I);
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2246,21 +2571,22 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto &MemOp = **I.memoperands_begin();
+ uint64_t MemSizeInBytes = MemOp.getSize();
if (MemOp.isAtomic()) {
// For now we just support s8 acquire loads to be able to compile stack
// protector code.
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
- MemOp.getSize() == 1) {
+ MemSizeInBytes == 1) {
I.setDesc(TII.get(AArch64::LDARB));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
- unsigned MemSizeInBits = MemOp.getSize() * 8;
+ unsigned MemSizeInBits = MemSizeInBytes * 8;
- const Register PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
+ const Register PtrReg = I.getOperand(1).getReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
@@ -2272,68 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
- const unsigned NewOpc =
- selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
- if (NewOpc == I.getOpcode())
- return false;
-
- I.setDesc(TII.get(NewOpc));
-
- uint64_t Offset = 0;
- auto *PtrMI = MRI.getVRegDef(PtrReg);
-
- // Try to fold a GEP into our unsigned immediate addressing mode.
- if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
- if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
- int64_t Imm = *COff;
- const unsigned Size = MemSizeInBits / 8;
- const unsigned Scale = Log2_32(Size);
- if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
- Register Ptr2Reg = PtrMI->getOperand(1).getReg();
- I.getOperand(1).setReg(Ptr2Reg);
- PtrMI = MRI.getVRegDef(Ptr2Reg);
- Offset = Imm / Size;
- }
+ // Helper lambda for partially selecting I. Either returns the original
+ // instruction with an updated opcode, or a new instruction.
+ auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
+ bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ const unsigned NewOpc =
+ selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
+ if (NewOpc == I.getOpcode())
+ return nullptr;
+ // Check if we can fold anything into the addressing mode.
+ auto AddrModeFns =
+ selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+ if (!AddrModeFns) {
+ // Can't fold anything. Use the original instruction.
+ I.setDesc(TII.get(NewOpc));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return &I;
}
- }
- // If we haven't folded anything into our addressing mode yet, try to fold
- // a frame index into the base+offset.
- if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
- I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+ // Folded something. Create a new instruction and return it.
+ auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
+ IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ NewInst.cloneMemRefs(I);
+ for (auto &Fn : *AddrModeFns)
+ Fn(NewInst);
+ I.eraseFromParent();
+ return &*NewInst;
+ };
- I.addOperand(MachineOperand::CreateImm(Offset));
+ MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
+ if (!LoadStore)
+ return false;
// If we're storing a 0, use WZR/XZR.
- if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
- if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
- if (I.getOpcode() == AArch64::STRWui)
- I.getOperand(0).setReg(AArch64::WZR);
- else if (I.getOpcode() == AArch64::STRXui)
- I.getOperand(0).setReg(AArch64::XZR);
+ if (Opcode == TargetOpcode::G_STORE) {
+ auto CVal = getConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
+ /*HandleFConstants = */ false);
+ if (CVal && CVal->Value == 0) {
+ switch (LoadStore->getOpcode()) {
+ case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
+ LoadStore->getOperand(0).setReg(AArch64::WZR);
+ break;
+ case AArch64::STRXui:
+ LoadStore->getOperand(0).setReg(AArch64::XZR);
+ break;
+ }
}
}
if (IsZExtLoad) {
- // The zextload from a smaller type to i32 should be handled by the importer.
- if (MRI.getType(ValReg).getSizeInBits() != 64)
+ // The zextload from a smaller type to i32 should be handled by the
+ // importer.
+ if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
- //and zero_extend with SUBREG_TO_REG.
+ // and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- Register DstReg = I.getOperand(0).getReg();
- I.getOperand(0).setReg(LdReg);
+ Register DstReg = LoadStore->getOperand(0).getReg();
+ LoadStore->getOperand(0).setReg(LdReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
@@ -2364,22 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
-
+ case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
- return selectVectorASHR(I, MRI);
+ return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
- case TargetOpcode::G_OR:
- case TargetOpcode::G_LSHR: {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -2408,37 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_UADDO: {
- // TODO: Support other types.
- unsigned OpSize = Ty.getSizeInBits();
- if (OpSize != 32 && OpSize != 64) {
- LLVM_DEBUG(
- dbgs()
- << "G_UADDO currently only supported for 32 and 64 b types.\n");
- return false;
- }
-
- // TODO: Support vectors.
- if (Ty.isVector()) {
- LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
- return false;
- }
-
- // Add and set the set condition flag.
- unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_USUBO: {
+ // Emit the operation and get the correct condition code.
MachineIRBuilder MIRBuilder(I);
- auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
- {I.getOperand(2), I.getOperand(3)});
- constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
+ auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
+ I.getOperand(2), I.getOperand(3), MIRBuilder);
// Now, put the overflow result in the register given by the first operand
- // to the G_UADDO. CSINC increments the result when the predicate is false,
- // so to get the increment when it's true, we need to use the inverse. In
- // this case, we want to increment when carry is set.
+ // to the overflow op. CSINC increments the result when the predicate is
+ // false, so to get the increment when it's true, we need to use the
+ // inverse. In this case, we want to increment when carry is set.
+ Register ZReg = AArch64::WZR;
auto CsetMI = MIRBuilder
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {Register(AArch64::WZR), Register(AArch64::WZR)})
- .addImm(getInvertedCondCode(AArch64CC::HS));
+ {ZReg, ZReg})
+ .addImm(getInvertedCondCode(OpAndCC.second));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
@@ -2446,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -2737,22 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (tryOptSelect(I))
return true;
- Register CSelOpc = selectSelectOpc(I, MRI, RBI);
- MachineInstr &TstMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
- .addDef(AArch64::WZR)
- .addUse(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
-
- MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
- .addDef(I.getOperand(0).getReg())
- .addUse(TReg)
- .addUse(FReg)
- .addImm(AArch64CC::NE);
-
- constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
-
+ // Make sure to use an unused vreg instead of wzr, so that the peephole
+ // optimizations will be able to optimize these.
+ MachineIRBuilder MIB(I);
+ Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
+ return false;
I.eraseFromParent();
return true;
}
@@ -2767,76 +3082,22 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
MachineIRBuilder MIRBuilder(I);
- MachineInstr *Cmp;
- CmpInst::Predicate Pred;
- std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
- I.getOperand(1), MIRBuilder);
- if (!Cmp)
- return false;
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
+ MIRBuilder);
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
- if (Ty != LLT::scalar(32)) {
- LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
- << ", expected: " << LLT::scalar(32) << '\n');
- return false;
- }
-
- unsigned CmpOpc = selectFCMPOpc(I, MRI);
- if (!CmpOpc)
+ MachineIRBuilder MIRBuilder(I);
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
+ MIRBuilder, Pred) ||
+ !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
return false;
-
- // FIXME: regbank
-
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(
- (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
-
- // Partially build the compare. Decide if we need to add a use for the
- // third operand based off whether or not we're comparing against 0.0.
- auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
- .addUse(I.getOperand(2).getReg());
-
- // If we don't have an immediate compare, then we need to add a use of the
- // register which wasn't used for the immediate.
- // Note that the immediate will always be the last operand.
- if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
- CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
-
- const Register DefReg = I.getOperand(0).getReg();
- Register Def1Reg = DefReg;
- if (CC2 != AArch64CC::AL)
- Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-
- MachineInstr &CSetMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
- .addDef(Def1Reg)
- .addUse(AArch64::WZR)
- .addUse(AArch64::WZR)
- .addImm(getInvertedCondCode(CC1));
-
- if (CC2 != AArch64CC::AL) {
- Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MachineInstr &CSet2MI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
- .addDef(Def2Reg)
- .addUse(AArch64::WZR)
- .addUse(AArch64::WZR)
- .addImm(getInvertedCondCode(CC2));
- MachineInstr &OrMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
- .addDef(DefReg)
- .addUse(Def1Reg)
- .addUse(Def2Reg);
- constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
- }
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
-
I.eraseFromParent();
return true;
}
@@ -2875,6 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
+ case AArch64::G_DUP: {
+ // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
+ // imported patterns. Do it manually here. Avoiding generating s16 gpr is
+ // difficult because at RBS we may end up pessimizing the fpr case if we
+ // decided to add an anyextend to fix this. Manual selection is the most
+ // robust solution for now.
+ Register SrcReg = I.getOperand(1).getReg();
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
+ return false; // We expect the fpr regbank case to be imported.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.getSizeInBits() == 16)
+ I.setDesc(TII.get(AArch64::DUPv8i16gpr));
+ else if (SrcTy.getSizeInBits() == 8)
+ I.setDesc(TII.get(AArch64::DUPv16i8gpr));
+ else
+ return false;
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
@@ -2895,8 +3174,49 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ return selectReduction(I, MRI);
+ }
+
+ return false;
+}
+
+bool AArch64InstructionSelector::selectReduction(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ Register VecReg = I.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(16, 8))
+ Opc = AArch64::ADDVv16i8v;
+ else if (VecTy == LLT::vector(8, 16))
+ Opc = AArch64::ADDVv8i16v;
+ else if (VecTy == LLT::vector(4, 32))
+ Opc = AArch64::ADDVv4i32v;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::ADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(2, 32))
+ Opc = AArch64::FADDPv2i32p;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::FADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
return false;
}
@@ -2910,6 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+
+ MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
@@ -2946,17 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
const GlobalValue &GV = *I.getOperand(1).getGlobal();
MachineIRBuilder MIB(I);
- MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
- .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+ auto LoadGOT =
+ MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
+ .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
- {Register(AArch64::X0)})
+ {LoadGOT.getReg(0)})
.addImm(0);
+ MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
+ .addUse(AArch64::X0, RegState::Implicit)
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
@@ -3442,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt(
(void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
- assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
+ assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
// Need the lane index to determine the correct copy opcode.
MachineOperand &LaneIdxOp = I.getOperand(2);
@@ -3457,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value;
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
MachineIRBuilder MIRBuilder(I);
@@ -3680,7 +4005,10 @@ static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
- if (EltSize == 32) {
+ if (EltSize == 16) {
+ Opc = AArch64::INSvi16gpr;
+ SubregIdx = AArch64::ssub;
+ } else if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
@@ -3709,135 +4037,223 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
return std::make_pair(Opc, SubregIdx);
}
+MachineInstr *AArch64InstructionSelector::emitInstr(
+ unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns) const {
+ assert(Opcode && "Expected an opcode?");
+ assert(!isPreISelGenericOpcode(Opcode) &&
+ "Function should only be used to produce selected instructions!");
+ auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
+ if (RenderFns)
+ for (auto &Fn : *RenderFns)
+ Fn(MI);
+ constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+ return &*MI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ auto Ty = MRI.getType(LHS.getReg());
+ assert(!Ty.isVector() && "Expected a scalar or pointer?");
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
+ bool Is32Bit = Size == 32;
+
+ // INSTRri form with positive arithmetic immediate.
+ if (auto Fns = selectArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRri form with negative arithmetic immediate.
+ if (auto Fns = selectNegArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrx form.
+ if (auto Fns = selectArithExtendedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrs form.
+ if (auto Fns = selectShiftedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+ return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
+ MIRBuilder);
+}
+
MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
- {AArch64::ADDWrr, AArch64::ADDWri}};
- bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
- auto ImmFns = selectArithImmed(RHS);
- unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
- auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
-
- // If we matched a valid constant immediate, add those operands.
- if (ImmFns) {
- for (auto &RenderFn : *ImmFns)
- RenderFn(AddMI);
- } else {
- AddMI.addUse(RHS.getReg());
- }
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDXri, AArch64::ADDWri},
+ {AArch64::ADDXrs, AArch64::ADDWrs},
+ {AArch64::ADDXrr, AArch64::ADDWrr},
+ {AArch64::SUBXri, AArch64::SUBWri},
+ {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::ADDSXrs, AArch64::ADDSWrs},
+ {AArch64::ADDSXrr, AArch64::ADDSWrr},
+ {AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+}
- constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
- return &*AddMI;
+MachineInstr *
+AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::SUBSXrs, AArch64::SUBSWrs},
+ {AArch64::SUBSXrr, AArch64::SUBSWrr},
+ {AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
- {AArch64::ADDSWrr, AArch64::ADDSWri}};
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
- auto ImmFns = selectArithImmed(RHS);
- unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-
- auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
-
- // If we matched a valid constant immediate, add those operands.
- if (ImmFns) {
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- } else {
- CmpMI.addUse(RHS.getReg());
- }
-
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
+ auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+ return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
}
MachineInstr *
-AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
+AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- unsigned RegSize = MRI.getType(LHS).getSizeInBits();
+ LLT Ty = MRI.getType(LHS.getReg());
+ unsigned RegSize = Ty.getSizeInBits();
bool Is32Bit = (RegSize == 32);
- static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
- {AArch64::ANDSWrr, AArch64::ANDSWri}};
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-
- // We might be able to fold in an immediate into the TST. We need to make sure
- // it's a logical immediate though, since ANDS requires that.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
- bool IsImmForm = ValAndVReg.hasValue() &&
- AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
- unsigned Opc = OpcTable[Is32Bit][IsImmForm];
- auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
-
- if (IsImmForm)
- TstMI.addImm(
- AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
- else
- TstMI.addUse(RHS);
+ const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
+ {AArch64::ANDSXrs, AArch64::ANDSWrs},
+ {AArch64::ANDSXrr, AArch64::ANDSWrr}};
+ // ANDS needs a logical immediate for its immediate form. Check if we can
+ // fold one in.
+ if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ int64_t Imm = ValAndVReg->Value.getSExtValue();
+
+ if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
+ auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
+ TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ return &*TstMI;
+ }
+ }
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- return &*TstMI;
+ if (auto Fns = selectLogicalShiftedRegister(RHS))
+ return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
+ return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
}
-std::pair<MachineInstr *, CmpInst::Predicate>
-AArch64InstructionSelector::emitIntegerCompare(
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ LLT CmpTy = MRI.getType(LHS.getReg());
+ assert(!CmpTy.isVector() && "Expected scalar or pointer");
+ unsigned Size = CmpTy.getSizeInBits();
+ (void)Size;
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
+ // Fold the compare into a cmn or tst if possible.
+ if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
+ return FoldCmp;
+ auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
+ return emitSUBS(Dst, LHS, RHS, MIRBuilder);
+}
- CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
-
- // Fold the compare if possible.
- MachineInstr *FoldCmp =
- tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
- if (FoldCmp)
- return {FoldCmp, P};
+MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
+ Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+#ifndef NDEBUG
+ LLT Ty = MRI.getType(Dst);
+ assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
+ "Expected a 32-bit scalar register?");
+#endif
+ const Register ZeroReg = AArch64::WZR;
+ auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
+ auto CSet =
+ MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
+ .addImm(getInvertedCondCode(CC));
+ constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
+ return &*CSet;
+ };
- // Can't fold into a CMN. Just emit a normal compare.
- unsigned CmpOpc = 0;
- Register ZReg;
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ if (CC2 == AArch64CC::AL)
+ return EmitCSet(Dst, CC1);
+
+ const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
+ Register Def1Reg = MRI.createVirtualRegister(RC);
+ Register Def2Reg = MRI.createVirtualRegister(RC);
+ EmitCSet(Def1Reg, CC1);
+ EmitCSet(Def2Reg, CC2);
+ auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
+ constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
+ return &*OrMI;
+}
- LLT CmpTy = MRI.getType(LHS.getReg());
- assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
- "Expected scalar or pointer");
- if (CmpTy == LLT::scalar(32)) {
- CmpOpc = AArch64::SUBSWrr;
- ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
- CmpOpc = AArch64::SUBSXrr;
- ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- } else {
- return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
- }
+MachineInstr *
+AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> Pred) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ LLT Ty = MRI.getType(LHS);
+ if (Ty.isVector())
+ return nullptr;
+ unsigned OpSize = Ty.getSizeInBits();
+ if (OpSize != 32 && OpSize != 64)
+ return nullptr;
- // Try to match immediate forms.
- MachineInstr *ImmedCmp =
- tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
- if (ImmedCmp)
- return {ImmedCmp, P};
+ // If this is a compare against +0.0, then we don't have
+ // to explicitly materialize a constant.
+ const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
+ bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
- // If we don't have an immediate, we may have a shift which can be folded
- // into the compare.
- MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
- if (ShiftedCmp)
- return {ShiftedCmp, P};
+ auto IsEqualityPred = [](CmpInst::Predicate P) {
+ return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
+ P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
+ };
+ if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
+ // Try commutating the operands.
+ const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
+ if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
+ ShouldUseImm = true;
+ std::swap(LHS, RHS);
+ }
+ }
+ unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+ {AArch64::FCMPSri, AArch64::FCMPDri}};
+ unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
- auto CmpMI =
- MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
- // Make sure that we can constrain the compare that we emitted.
+ // Partially build the compare. Decide if we need to add a use for the
+ // third operand based off whether or not we're comparing against 0.0.
+ auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
+ if (!ShouldUseImm)
+ CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return {&*CmpMI, P};
+ return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -3947,11 +4363,28 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
return &*I;
}
+std::pair<MachineInstr *, AArch64CC::CondCode>
+AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
+ MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_UADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+ case TargetOpcode::G_SSUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_USUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+ }
+}
+
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-
// We want to recognize this pattern:
//
// $z = G_FCMP pred, $x, $y
@@ -4008,27 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
- MachineInstr *Cmp;
- CmpInst::Predicate Pred;
-
- std::tie(Cmp, Pred) =
- emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
- CondDef->getOperand(1), MIB);
-
- if (!Cmp) {
- LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
- return false;
- }
-
- // Have to collect the CondCode after emitIntegerCompare, since it can
- // update the predicate.
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
CondCode = changeICMPPredToAArch64CC(Pred);
+ emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+ CondDef->getOperand(1), MIB);
} else {
// Get the condition code for the select.
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
AArch64CC::CondCode CondCode2;
- changeFCMPPredToAArch64CC(
- (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
- CondCode2);
+ changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison.
@@ -4037,25 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
if (CondCode2 != AArch64CC::AL)
return false;
- // Make sure we'll be able to select the compare.
- unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
- if (!CmpOpc)
+ if (!emitFPCompare(CondDef->getOperand(2).getReg(),
+ CondDef->getOperand(3).getReg(), MIB)) {
+ LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
-
- // Emit a new compare.
- auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
- if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
- Cmp.addUse(CondDef->getOperand(3).getReg());
- constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+ }
}
// Emit the select.
- unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
- auto CSel =
- MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
- {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
- .addImm(CondCode);
- constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
+ emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
+ I.getOperand(3).getReg(), CondCode, MIB);
I.eraseFromParent();
return true;
}
@@ -4138,162 +4552,20 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this if the compare is signed:
//
// tst x, y
- if (!isUnsignedICMPPred(P) && LHSDef &&
+ if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
- return emitTST(LHSDef->getOperand(1).getReg(),
- LHSDef->getOperand(2).getReg(), MIRBuilder);
+ return emitTST(LHSDef->getOperand(1),
+ LHSDef->getOperand(2), MIRBuilder);
}
return nullptr;
}
-MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
- MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
- MachineIRBuilder &MIB) const {
- // Attempt to select the immediate form of an integer compare.
- MachineRegisterInfo &MRI = *MIB.getMRI();
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected scalar or pointer only?");
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) &&
- "Expected 32 bit or 64 bit compare only?");
-
- // Check if this is a case we can already handle.
- InstructionSelector::ComplexRendererFns ImmFns;
- ImmFns = selectArithImmed(RHS);
-
- if (!ImmFns) {
- // We didn't get a rendering function, but we may still have a constant.
- auto MaybeImmed = getImmedFromMO(RHS);
- if (!MaybeImmed)
- return nullptr;
-
- // We have a constant, but it doesn't fit. Try adjusting it by one and
- // updating the predicate if possible.
- uint64_t C = *MaybeImmed;
- CmpInst::Predicate NewP;
- switch (P) {
- default:
- return nullptr;
- case CmpInst::ICMP_SLT:
- case CmpInst::ICMP_SGE:
- // Check for
- //
- // x slt c => x sle c - 1
- // x sge c => x sgt c - 1
- //
- // When c is not the smallest possible negative number.
- if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
- (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
- return nullptr;
- NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
- C -= 1;
- break;
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_UGE:
- // Check for
- //
- // x ult c => x ule c - 1
- // x uge c => x ugt c - 1
- //
- // When c is not zero.
- if (C == 0)
- return nullptr;
- NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
- C -= 1;
- break;
- case CmpInst::ICMP_SLE:
- case CmpInst::ICMP_SGT:
- // Check for
- //
- // x sle c => x slt c + 1
- // x sgt c => s sge c + 1
- //
- // When c is not the largest possible signed integer.
- if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
- (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
- return nullptr;
- NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
- C += 1;
- break;
- case CmpInst::ICMP_ULE:
- case CmpInst::ICMP_UGT:
- // Check for
- //
- // x ule c => x ult c + 1
- // x ugt c => s uge c + 1
- //
- // When c is not the largest possible unsigned integer.
- if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
- (Size == 64 && C == UINT64_MAX))
- return nullptr;
- NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
- C += 1;
- break;
- }
-
- // Check if the new constant is valid.
- if (Size == 32)
- C = static_cast<uint32_t>(C);
- ImmFns = select12BitValueWithLeftShift(C);
- if (!ImmFns)
- return nullptr;
- P = NewP;
- }
-
- // At this point, we know we can select an immediate form. Go ahead and do
- // that.
- Register ZReg;
- unsigned Opc;
- if (Size == 32) {
- ZReg = AArch64::WZR;
- Opc = AArch64::SUBSWri;
- } else {
- ZReg = AArch64::XZR;
- Opc = AArch64::SUBSXri;
- }
-
- auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
-}
-
-MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
- MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
- // We are looking for the following pattern:
- //
- // shift = G_SHL/ASHR/LHSR y, c
- // ...
- // cmp = G_ICMP pred, something, shift
- //
- // Since we will select the G_ICMP to a SUBS, we can potentially fold the
- // shift into the subtract.
- static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
- static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
- auto ImmFns = selectShiftedRegister(RHS);
- if (!ImmFns)
- return nullptr;
- MachineRegisterInfo &MRI = *MIB.getMRI();
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected scalar or pointer only?");
- unsigned Size = Ty.getSizeInBits();
- bool Idx = (Size == 64);
- Register ZReg = ZRegTable[Idx];
- unsigned Opc = OpcTable[Idx];
- auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
-}
-
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -4436,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value;
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
// Perform the lane insert.
Register SrcReg = I.getOperand(1).getReg();
@@ -4493,8 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt(
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
- if (DstTy.getSizeInBits() < 32)
+ unsigned DstSize = DstTy.getSizeInBits();
+ assert(DstSize <= 128 && "Unexpected build_vec type!");
+ if (DstSize < 32)
return false;
// Check if we're building a constant vector, in which case we want to
// generate a constant pool load instead of a vector insert sequence.
@@ -4515,6 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
}
Constant *CV = ConstantVector::get(Csts);
MachineIRBuilder MIB(I);
+ if (CV->isNullValue()) {
+ // Until the importer can support immAllZerosV in pattern leaf nodes,
+ // select a zero move manually here.
+ Register DstReg = I.getOperand(0).getReg();
+ if (DstSize == 128) {
+ auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ } else if (DstSize == 64) {
+ auto Mov =
+ MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
+ .addImm(0);
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(Mov.getReg(0), 0, AArch64::dsub);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
+ }
+ }
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
@@ -4634,10 +4925,12 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
break;
case Intrinsic::debugtrap:
- if (!STI.isTargetWindows())
- return false;
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
+ case Intrinsic::ubsantrap:
+ MIRBuilder.buildInstr(AArch64::BRK, {}, {})
+ .addImm(I.getOperand(1).getImm() | ('U' << 8));
+ break;
}
I.eraseFromParent();
@@ -4703,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
- if (MFReturnAddr) {
- MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
- I.eraseFromParent();
- return true;
+ if (!MFReturnAddr) {
+ // Insert the copy from LR/X30 into the entry block, before it can be
+ // clobbered by anything.
+ MFI.setReturnAddressIsTaken(true);
+ MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
+ AArch64::GPR64RegClass);
}
- MFI.setReturnAddressIsTaken(true);
- MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
- // Insert the copy from LR/X30 into the entry block, before it can be
- // clobbered by anything.
- MachineBasicBlock &EntryBlock = *MF.begin();
- if (!EntryBlock.isLiveIn(AArch64::LR))
- EntryBlock.addLiveIn(AArch64::LR);
- MachineIRBuilder EntryBuilder(MF);
- EntryBuilder.setInstr(*EntryBlock.begin());
- EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
- MFReturnAddr = DstReg;
+
+ if (STI.hasPAuth()) {
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
+ } else {
+ MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
+
I.eraseFromParent();
return true;
}
@@ -4738,7 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MIRBuilder.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
- MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
+
+ if (STI.hasPAuth()) {
+ Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
+ } else {
+ MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
}
I.eraseFromParent();
@@ -4946,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
- int64_t ImmVal = ValAndVReg->Value;
+ int64_t ImmVal = ValAndVReg->Value.getSExtValue();
// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
@@ -5086,12 +5388,60 @@ InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-
- // If we have a constant offset, then we probably don't want to match a
- // register offset.
- if (isBaseWithConstantOffset(Root, MRI))
+ if (!Root.isReg())
+ return None;
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd)
return None;
+ // Check for an immediates which cannot be encoded in the [base + imm]
+ // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
+ // end up with code like:
+ //
+ // mov x0, wide
+ // add x1 base, x0
+ // ldr x2, [x1, x0]
+ //
+ // In this situation, we can use the [base, xreg] addressing mode to save an
+ // add/sub:
+ //
+ // mov x0, wide
+ // ldr x2, [base, x0]
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ if (ValAndVReg) {
+ unsigned Scale = Log2_32(SizeInBytes);
+ int64_t ImmOff = ValAndVReg->Value.getSExtValue();
+
+ // Skip immediates that can be selected in the load/store addresing
+ // mode.
+ if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
+ ImmOff < (0x1000 << Scale))
+ return None;
+
+ // Helper lambda to decide whether or not it is preferable to emit an add.
+ auto isPreferredADD = [](int64_t ImmOff) {
+ // Constants in [0x0, 0xfff] can be encoded in an add.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+
+ // Can it be encoded in an add lsl #12?
+ if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
+ return false;
+
+ // It can be encoded in an add lsl #12, but we may not want to. If it is
+ // possible to select this as a single movz, then prefer that. A single
+ // movz is faster than an add with a shift.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ };
+
+ // If the immediate can be encoded in a single add/sub, then bail out.
+ if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return None;
+ }
+
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
@@ -5521,7 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
- Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+ Optional<int64_t> CstVal =
+ getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}