aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp90
1 files changed, 61 insertions, 29 deletions
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 322cb6f6f581..a44c583a1ca5 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -67,15 +67,9 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
// The mask is constant or extended from a bool vector. Convert this x86
// intrinsic to the LLVM intrinsic to allow target-independent optimizations.
if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) {
- // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
- // the LLVM intrinsic definition for the pointer argument.
- unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
- PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
- Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
-
// The pass-through vector for an x86 masked load is a zero vector.
CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad(
- II.getType(), PtrCast, Align(1), BoolMask, ZeroVec);
+ II.getType(), Ptr, Align(1), BoolMask, ZeroVec);
return IC.replaceInstUsesWith(II, NewMaskedLoad);
}
@@ -105,7 +99,7 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
// intrinsic to the LLVM intrinsic to allow target-independent optimizations.
if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) {
unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
- PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
+ PointerType *VecPtrTy = PointerType::get(Vec->getContext(), AddrSpace);
Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
@@ -623,11 +617,13 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
if (isa<UndefValue>(Arg))
return Constant::getNullValue(ResTy);
- auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType());
- // We can't easily peek through x86_mmx types.
- if (!ArgTy)
+ // Preserve previous behavior and give up.
+ // TODO: treat as <8 x i8>.
+ if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb)
return nullptr;
+ auto *ArgTy = cast<FixedVectorType>(Arg->getType());
+
// Expand MOVMSK to compare/bitcast/zext:
// e.g. PMOVMSKB(v16i8 x):
// %cmp = icmp slt <16 x i8> %x, zeroinitializer
@@ -1873,9 +1869,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
// If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
Value *Args[] = {Op0, CILength, CIIndex};
- Module *M = II.getModule();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
- return Builder.CreateCall(F, Args);
+ return Builder.CreateIntrinsic(Intrinsic::x86_sse4a_extrqi, {}, Args);
}
}
@@ -1972,9 +1966,7 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
Value *Args[] = {Op0, Op1, CILength, CIIndex};
- Module *M = II.getModule();
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
- return Builder.CreateCall(F, Args);
+ return Builder.CreateIntrinsic(Intrinsic::x86_sse4a_insertqi, {}, Args);
}
return nullptr;
@@ -2142,6 +2134,22 @@ static Value *simplifyX86vpermv3(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, Size));
}
+// Simplify VPERMV/VPERMV3 mask - only demand the active index bits.
+static bool simplifyX86VPERMMask(Instruction *II, bool IsBinary,
+ InstCombiner &IC) {
+ auto *VecTy = cast<FixedVectorType>(II->getType());
+ unsigned EltSizeInBits = VecTy->getScalarSizeInBits();
+ unsigned NumElts = VecTy->getNumElements();
+ assert(isPowerOf2_32(NumElts) && isPowerOf2_32(EltSizeInBits) &&
+ "Unexpected shuffle mask size");
+
+ unsigned IdxSizeInBits = Log2_32(IsBinary ? (2 * NumElts) : NumElts);
+ APInt DemandedMask = APInt::getLowBitsSet(EltSizeInBits, IdxSizeInBits);
+
+ KnownBits KnownMask(EltSizeInBits);
+ return IC.SimplifyDemandedBits(II, /*OpNo=*/1, DemandedMask, KnownMask);
+}
+
std::optional<Instruction *>
X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
@@ -2948,22 +2956,42 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
- case Intrinsic::x86_avx512_pshuf_b_512:
+ case Intrinsic::x86_avx512_pshuf_b_512: {
if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+
+ KnownBits KnownMask(8);
+ if (IC.SimplifyDemandedBits(&II, 1, APInt(8, 0b10001111), KnownMask))
+ return &II;
break;
+ }
case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
- case Intrinsic::x86_avx512_vpermilvar_ps_512:
+ case Intrinsic::x86_avx512_vpermilvar_ps_512: {
+ if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+
+ KnownBits KnownMask(32);
+ if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
+ return &II;
+ break;
+ }
+
case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
- case Intrinsic::x86_avx512_vpermilvar_pd_512:
+ case Intrinsic::x86_avx512_vpermilvar_pd_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+
+ KnownBits KnownMask(64);
+ if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
+ return &II;
break;
+ }
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
@@ -2982,20 +3010,22 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+ if (simplifyX86VPERMMask(&II, /*IsBinary=*/false, IC))
+ return &II;
break;
case Intrinsic::x86_avx512_vpermi2var_d_128:
case Intrinsic::x86_avx512_vpermi2var_d_256:
case Intrinsic::x86_avx512_vpermi2var_d_512:
- case Intrinsic::x86_avx512_vpermi2var_hi_128:
- case Intrinsic::x86_avx512_vpermi2var_hi_256:
- case Intrinsic::x86_avx512_vpermi2var_hi_512:
- case Intrinsic::x86_avx512_vpermi2var_pd_128:
- case Intrinsic::x86_avx512_vpermi2var_pd_256:
- case Intrinsic::x86_avx512_vpermi2var_pd_512:
- case Intrinsic::x86_avx512_vpermi2var_ps_128:
- case Intrinsic::x86_avx512_vpermi2var_ps_256:
- case Intrinsic::x86_avx512_vpermi2var_ps_512:
+ case Intrinsic::x86_avx512_vpermi2var_hi_128:
+ case Intrinsic::x86_avx512_vpermi2var_hi_256:
+ case Intrinsic::x86_avx512_vpermi2var_hi_512:
+ case Intrinsic::x86_avx512_vpermi2var_pd_128:
+ case Intrinsic::x86_avx512_vpermi2var_pd_256:
+ case Intrinsic::x86_avx512_vpermi2var_pd_512:
+ case Intrinsic::x86_avx512_vpermi2var_ps_128:
+ case Intrinsic::x86_avx512_vpermi2var_ps_256:
+ case Intrinsic::x86_avx512_vpermi2var_ps_512:
case Intrinsic::x86_avx512_vpermi2var_q_128:
case Intrinsic::x86_avx512_vpermi2var_q_256:
case Intrinsic::x86_avx512_vpermi2var_q_512:
@@ -3005,6 +3035,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Value *V = simplifyX86vpermv3(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}
+ if (simplifyX86VPERMMask(&II, /*IsBinary=*/true, IC))
+ return &II;
break;
case Intrinsic::x86_avx_maskload_ps: