1 files changed, 201 insertions, 105 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4064a983099c..f06afdbcea9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -205,7 +205,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
   }
-  for (auto T : MVT::integer_vector_valuetypes())
+  for (auto T : MVT::integer_fixedlen_vector_valuetypes())
     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
 
   // Dynamic stack allocation: use the default expansion.
@@ -228,7 +228,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   //  - Floating-point extending loads.
   //  - Floating-point truncating stores.
   //  - i1 extending loads.
-  //  - extending/truncating SIMD loads/stores
+  //  - truncating SIMD stores and most extending loads
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
   for (auto T : MVT::integer_valuetypes())
@@ -237,7 +237,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   if (Subtarget->hasSIMD128()) {
     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
                    MVT::v2f64}) {
-      for (auto MemT : MVT::vector_valuetypes()) {
+      for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
         if (MVT(T) != MemT) {
           setTruncStoreAction(T, MemT, Expand);
           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
@@ -245,6 +245,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
         }
       }
     }
+    // But some vector extending loads are legal
+    if (Subtarget->hasUnimplementedSIMD128()) {
+      for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+        setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
+        setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
+        setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
+      }
+    }
   }
 
   // Don't do anything clever with build_pairs
@@ -259,16 +267,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
 
   setMaxAtomicSizeInBitsSupported(64);
 
-  if (Subtarget->hasBulkMemory()) {
-    // Use memory.copy and friends over multiple loads and stores
-    MaxStoresPerMemcpy = 1;
-    MaxStoresPerMemcpyOptSize = 1;
-    MaxStoresPerMemmove = 1;
-    MaxStoresPerMemmoveOptSize = 1;
-    MaxStoresPerMemset = 1;
-    MaxStoresPerMemsetOptSize = 1;
-  }
-
   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
   // consistent with the f64 and f128 names.
   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
@@ -337,8 +335,8 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
                                        bool Float64, unsigned LoweredOpcode) {
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
 
-  unsigned OutReg = MI.getOperand(0).getReg();
-  unsigned InReg = MI.getOperand(1).getReg();
+  Register OutReg = MI.getOperand(0).getReg();
+  Register InReg = MI.getOperand(1).getReg();
 
   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
@@ -396,9 +394,9 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
   // For unsigned numbers, we have to do a separate comparison with zero.
   if (IsUnsigned) {
     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
-    unsigned SecondCmpReg =
+    Register SecondCmpReg =
         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
-    unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
     BuildMI(BB, DL, TII.get(FConst), Tmp1)
         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
@@ -550,6 +548,16 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
   return true;
 }
 
+bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+  if (!Subtarget->hasUnimplementedSIMD128())
+    return false;
+  MVT ExtT = ExtVal.getSimpleValueType();
+  MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
+  return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
+         (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
+         (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
+}
+
 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
                                                   LLVMContext &C,
                                                   EVT VT) const {
@@ -569,7 +577,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::i32;
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = 4;
+    Info.align = Align(4);
     // atomic.notify instruction does not really load the memory specified with
     // this argument, but MachineMemOperand should either be load or store, so
     // we set this to a load.
@@ -583,7 +591,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::i32;
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = 4;
+    Info.align = Align(4);
     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
     return true;
   case Intrinsic::wasm_atomic_wait_i64:
@@ -591,7 +599,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::i64;
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = 8;
+    Info.align = Align(8);
     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
     return true;
   default:
@@ -623,7 +631,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) {
          CallConv == CallingConv::Cold ||
          CallConv == CallingConv::PreserveMost ||
          CallConv == CallingConv::PreserveAll ||
-         CallConv == CallingConv::CXX_FAST_TLS;
+         CallConv == CallingConv::CXX_FAST_TLS ||
+         CallConv == CallingConv::WASM_EmscriptenInvoke;
 }
 
 SDValue
@@ -644,13 +653,36 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
   if (CLI.IsPatchPoint)
     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
 
-  // Fail if tail calls are required but not enabled
-  if (!Subtarget->hasTailCall()) {
-    if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
-         MF.getTarget().Options.GuaranteedTailCallOpt) ||
-        (CLI.CS && CLI.CS.isMustTailCall()))
-      fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
-    CLI.IsTailCall = false;
+  if (CLI.IsTailCall) {
+    bool MustTail = CLI.CS && CLI.CS.isMustTailCall();
+    if (Subtarget->hasTailCall() && !CLI.IsVarArg) {
+      // Do not tail call unless caller and callee return types match
+      const Function &F = MF.getFunction();
+      const TargetMachine &TM = getTargetMachine();
+      Type *RetTy = F.getReturnType();
+      SmallVector<MVT, 4> CallerRetTys;
+      SmallVector<MVT, 4> CalleeRetTys;
+      computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
+      computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
+      bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
+                        std::equal(CallerRetTys.begin(), CallerRetTys.end(),
+                                   CalleeRetTys.begin());
+      if (!TypesMatch) {
+        // musttail in this case would be an LLVM IR validation failure
+        assert(!MustTail);
+        CLI.IsTailCall = false;
+      }
+    } else {
+      CLI.IsTailCall = false;
+      if (MustTail) {
+        if (CLI.IsVarArg) {
+          // The return would pop the argument buffer
+          fail(DL, DAG, "WebAssembly does not support varargs tail calls");
+        } else {
+          fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
+        }
+      }
+    }
   }
 
   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
@@ -659,6 +691,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+
+  // The generic code may have added an sret argument. If we're lowering an
+  // invoke function, the ABI requires that the function pointer be the first
+  // argument, so we may have to swap the arguments.
+  if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
+      Outs[0].Flags.isSRet()) {
+    std::swap(Outs[0], Outs[1]);
+    std::swap(OutVals[0], OutVals[1]);
+  }
+
   unsigned NumFixedArgs = 0;
   for (unsigned I = 0; I < Outs.size(); ++I) {
     const ISD::OutputArg &Out = Outs[I];
@@ -810,8 +852,8 @@ bool WebAssemblyTargetLowering::CanLowerReturn(
     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     LLVMContext & /*Context*/) const {
-  // WebAssembly can't currently handle returning tuples.
-  return Outs.size() <= 1;
+  // WebAssembly can only handle returning tuples with multivalue enabled
+  return Subtarget->hasMultivalue() || Outs.size() <= 1;
 }
 
 SDValue WebAssemblyTargetLowering::LowerReturn(
@@ -819,7 +861,8 @@ SDValue WebAssemblyTargetLowering::LowerReturn(
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
     SelectionDAG &DAG) const {
-  assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
+  assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
+         "MVP WebAssembly can only return up to one value");
   if (!callingConvSupported(CallConv))
     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
 
@@ -881,7 +924,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
   // the buffer is passed as an argument.
   if (IsVarArg) {
     MVT PtrVT = getPointerTy(MF.getDataLayout());
-    unsigned VarargVreg =
+    Register VarargVreg =
         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
     MFI->setVarargBufferVreg(VarargVreg);
     Chain = DAG.getCopyToReg(
@@ -1022,8 +1065,9 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
     return SDValue();
 
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  MakeLibCallOptions CallOptions;
   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
-                     {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL)
+                     {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
       .first;
 }
 
@@ -1037,7 +1081,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
 
   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
-  unsigned FP =
+  Register FP =
       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
 }
@@ -1249,68 +1293,116 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   const EVT VecT = Op.getValueType();
   const EVT LaneT = Op.getOperand(0).getValueType();
   const size_t Lanes = Op.getNumOperands();
+  bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
+
+  // BUILD_VECTORs are lowered to the instruction that initializes the highest
+  // possible number of lanes at once followed by a sequence of replace_lane
+  // instructions to individually initialize any remaining lanes.
+
+  // TODO: Tune this. For example, lanewise swizzling is very expensive, so
+  // swizzled lanes should be given greater weight.
+
+  // TODO: Investigate building vectors by shuffling together vectors built by
+  // separately specialized means.
+
   auto IsConstant = [](const SDValue &V) {
     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
   };
 
-  // Find the most common operand, which is approximately the best to splat
-  using Entry = std::pair<SDValue, size_t>;
-  SmallVector<Entry, 16> ValueCounts;
-  size_t NumConst = 0, NumDynamic = 0;
-  for (const SDValue &Lane : Op->op_values()) {
-    if (Lane.isUndef()) {
-      continue;
-    } else if (IsConstant(Lane)) {
-      NumConst++;
-    } else {
-      NumDynamic++;
-    }
-    auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(),
-                                [&Lane](Entry A) { return A.first == Lane; });
-    if (CountIt == ValueCounts.end()) {
-      ValueCounts.emplace_back(Lane, 1);
+  // Returns the source vector and index vector pair if they exist. Checks for:
+  //   (extract_vector_elt
+  //     $src,
+  //     (sign_extend_inreg (extract_vector_elt $indices, $i))
+  //   )
+  auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
+    auto Bail = std::make_pair(SDValue(), SDValue());
+    if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      return Bail;
+    const SDValue &SwizzleSrc = Lane->getOperand(0);
+    const SDValue &IndexExt = Lane->getOperand(1);
+    if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
+      return Bail;
+    const SDValue &Index = IndexExt->getOperand(0);
+    if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      return Bail;
+    const SDValue &SwizzleIndices = Index->getOperand(0);
+    if (SwizzleSrc.getValueType() != MVT::v16i8 ||
+        SwizzleIndices.getValueType() != MVT::v16i8 ||
+        Index->getOperand(1)->getOpcode() != ISD::Constant ||
+        Index->getConstantOperandVal(1) != I)
+      return Bail;
+    return std::make_pair(SwizzleSrc, SwizzleIndices);
+  };
+
+  using ValueEntry = std::pair<SDValue, size_t>;
+  SmallVector<ValueEntry, 16> SplatValueCounts;
+
+  using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
+  SmallVector<SwizzleEntry, 16> SwizzleCounts;
+
+  auto AddCount = [](auto &Counts, const auto &Val) {
+    auto CountIt = std::find_if(Counts.begin(), Counts.end(),
+                                [&Val](auto E) { return E.first == Val; });
+    if (CountIt == Counts.end()) {
+      Counts.emplace_back(Val, 1);
     } else {
       CountIt->second++;
     }
+  };
+
+  auto GetMostCommon = [](auto &Counts) {
+    auto CommonIt =
+        std::max_element(Counts.begin(), Counts.end(),
+                         [](auto A, auto B) { return A.second < B.second; });
+    assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
+    return *CommonIt;
+  };
+
+  size_t NumConstantLanes = 0;
+
+  // Count eligible lanes for each type of vector creation op
+  for (size_t I = 0; I < Lanes; ++I) {
+    const SDValue &Lane = Op->getOperand(I);
+    if (Lane.isUndef())
+      continue;
+
+    AddCount(SplatValueCounts, Lane);
+
+    if (IsConstant(Lane)) {
+      NumConstantLanes++;
+    } else if (CanSwizzle) {
+      auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
+      if (SwizzleSrcs.first)
+        AddCount(SwizzleCounts, SwizzleSrcs);
+    }
   }
-  auto CommonIt =
-      std::max_element(ValueCounts.begin(), ValueCounts.end(),
-                       [](Entry A, Entry B) { return A.second < B.second; });
-  assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector");
-  SDValue SplatValue = CommonIt->first;
-  size_t NumCommon = CommonIt->second;
-
-  // If v128.const is available, consider using it instead of a splat
+
+  SDValue SplatValue;
+  size_t NumSplatLanes;
+  std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
+
+  SDValue SwizzleSrc;
+  SDValue SwizzleIndices;
+  size_t NumSwizzleLanes = 0;
+  if (SwizzleCounts.size())
+    std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
+                          NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
+
+  // Predicate returning true if the lane is properly initialized by the
+  // original instruction
+  std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
+  SDValue Result;
   if (Subtarget->hasUnimplementedSIMD128()) {
-    // {i32,i64,f32,f64}.const opcode, and value
-    const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes);
-    // SIMD prefix and opcode
-    const size_t SplatBytes = 2;
-    const size_t SplatConstBytes = SplatBytes + ConstBytes;
-    // SIMD prefix, opcode, and lane index
-    const size_t ReplaceBytes = 3;
-    const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes;
-    // SIMD prefix, v128.const opcode, and 128-bit value
-    const size_t VecConstBytes = 18;
-    // Initial v128.const and a replace_lane for each non-const operand
-    const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes;
-    // Initial splat and all necessary replace_lanes
-    const size_t SplatInitBytes =
-        IsConstant(SplatValue)
-            // Initial constant splat
-            ? (SplatConstBytes +
-               // Constant replace_lanes
-               (NumConst - NumCommon) * ReplaceConstBytes +
-               // Dynamic replace_lanes
-               (NumDynamic * ReplaceBytes))
-            // Initial dynamic splat
-            : (SplatBytes +
-               // Constant replace_lanes
-               (NumConst * ReplaceConstBytes) +
-               // Dynamic replace_lanes
-               (NumDynamic - NumCommon) * ReplaceBytes);
-    if (ConstInitBytes < SplatInitBytes) {
-      // Create build_vector that will lower to initial v128.const
+    // Prefer swizzles over vector consts over splats
+    if (NumSwizzleLanes >= NumSplatLanes &&
+        NumSwizzleLanes >= NumConstantLanes) {
+      Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
+                           SwizzleIndices);
+      auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
+      IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
+        return Swizzled == GetSwizzleSrcs(I, Lane);
+      };
+    } else if (NumConstantLanes >= NumSplatLanes) {
       SmallVector<SDValue, 16> ConstLanes;
       for (const SDValue &Lane : Op->op_values()) {
         if (IsConstant(Lane)) {
@@ -1321,26 +1413,35 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
           ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
         }
       }
-      SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes);
-      // Add replace_lane instructions for non-const lanes
-      for (size_t I = 0; I < Lanes; ++I) {
-        const SDValue &Lane = Op->getOperand(I);
-        if (!Lane.isUndef() && !IsConstant(Lane))
-          Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
-                               DAG.getConstant(I, DL, MVT::i32));
-      }
-      return Result;
+      Result = DAG.getBuildVector(VecT, DL, ConstLanes);
+      IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+        return IsConstant(Lane);
+      };
+    }
+  }
+  if (!Result) {
+    // Use a splat, but possibly a load_splat
+    LoadSDNode *SplattedLoad;
+    if (Subtarget->hasUnimplementedSIMD128() &&
+        (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
+        SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
+      Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
+    } else {
+      Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
     }
+    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+      return Lane == SplatValue;
+    };
   }
-  // Use a splat for the initial vector
-  SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
-  // Add replace_lane instructions for other values
+
+  // Add replace_lane instructions for any unhandled values
   for (size_t I = 0; I < Lanes; ++I) {
     const SDValue &Lane = Op->getOperand(I);
-    if (Lane != SplatValue)
+    if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
                            DAG.getConstant(I, DL, MVT::i32));
   }
+
   return Result;
 }
 
@@ -1415,11 +1516,6 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
   // Only manually lower vector shifts
   assert(Op.getSimpleValueType().isVector());
 
-  // Expand all vector shifts until V8 fixes its implementation
-  // TODO: remove this once V8 is fixed
-  if (!Subtarget->hasUnimplementedSIMD128())
-    return unrollVectorShift(Op, DAG);
-
   // Unroll non-splat vector shifts
   BuildVectorSDNode *ShiftVec;
   SDValue SplatVal;