aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-12-30 11:46:15 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-12-30 11:46:15 +0000
commitdd58ef019b700900793a1eb48b52123db01b654e (patch)
treefcfbb4df56a744f4ddc6122c50521dd3f1c5e196 /lib/Target/X86/X86FastISel.cpp
parent2fe5752e3a7c345cdb59e869278d36af33c13fa4 (diff)
downloadsrc-dd58ef019b700900793a1eb48b52123db01b654e.tar.gz
src-dd58ef019b700900793a1eb48b52123db01b654e.zip
Vendor import of llvm trunk r256633:
Notes
Notes: svn path=/vendor/llvm/dist/; revision=292915
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r--lib/Target/X86/X86FastISel.cpp168
1 files changed, 102 insertions, 66 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b4319c8bb04f..de94a138d865 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -298,8 +298,8 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
return false;
// Make sure nothing is in the way
- BasicBlock::const_iterator Start = I;
- BasicBlock::const_iterator End = II;
+ BasicBlock::const_iterator Start(I);
+ BasicBlock::const_iterator End(II);
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
@@ -433,6 +433,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasSSE4A = Subtarget->hasSSE4A();
+ bool HasAVX = Subtarget->hasAVX();
+ bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -449,35 +454,59 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
- case MVT::i32: Opc = X86::MOV32mr; break;
- case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::i32:
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
+ break;
case MVT::f32:
- Opc = X86ScalarSSEf32 ?
- (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSS;
+ else
+ Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+ } else
+ Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = X86ScalarSSEf64 ?
- (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSD;
+ else
+ Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+ } else
+ Opc = X86::ST_Fp64m;
break;
case MVT::v4f32:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
- else
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+ else
+ Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ } else
Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
@@ -1069,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax. We also do the same with %eax for Win32.
- if (F.hasStructRetAttr() &&
- (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+ // All x86 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
+ if (F.hasStructRetAttr()) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -1431,17 +1459,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addMBB(TrueMBB);
}
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
-
- // Emits an unconditional branch to the FalseBB, obtains the branch
- // weight, and adds it to the successor list.
- fastEmitBranch(FalseMBB, DbgLoc);
-
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1472,12 +1490,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
}
@@ -1492,12 +1506,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1511,12 +1520,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1945,6 +1949,9 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned ResultReg;
if (Subtarget->hasAVX()) {
+ const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
+
// If we have AVX, create 1 blendv instead of 3 logic instructions.
// Blendv was introduced with SSE 4.1, but the 2 register form implicitly
// uses XMM0 as the selection register. That may need just as many
@@ -1955,10 +1962,13 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned BlendOpcode =
(RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
- LHSReg, LHSIsKill, CmpReg, true);
+ unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
@@ -2806,10 +2816,12 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE)
return 0;
- if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
- return 0;
- if (CS && CS->paramHasAttr(1, Attribute::InReg))
- return 0;
+
+ if (CS)
+ if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
+ CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+ return 0;
+
return 4;
}
@@ -2924,7 +2936,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -3020,8 +3032,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
- ArgVT.getStoreSize(), Alignment);
+ MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (Flags.isByVal()) {
X86AddressMode SrcAM;
SrcAM.Base.Reg = ArgReg;
@@ -3252,6 +3264,30 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
updateValueMap(I, Reg);
return true;
}
+ case Instruction::BitCast: {
+ // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+ if (!Subtarget->hasSSE2())
+ return false;
+
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (!SrcVT.isSimple() || !DstVT.isSimple())
+ return false;
+
+ if (!SrcVT.is128BitVector() &&
+ !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+ return false;
+
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+
+ // No instruction is needed for conversion. Reuse the register used by
+ // the fist operand.
+ updateValueMap(I, Reg);
+ return true;
+ }
}
return false;
@@ -3384,8 +3420,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
TII.get(Opc), ResultReg);
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- DL.getPointerSize(), Align);
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}