From 6694ed095d6b27a2c92ec4fd63664fcd88a05749 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 6 Jan 2017 20:13:35 +0000 Subject: Vendor import of clang trunk r291274: https://llvm.org/svn/llvm-project/cfe/trunk@291274 --- examples/clang-interpreter/main.cpp | 2 +- include/clang/AST/DeclCXX.h | 2 +- include/clang/ASTMatchers/Dynamic/VariantValue.h | 7 +- include/clang/Basic/Attr.td | 26 +- include/clang/Basic/BuiltinsPPC.def | 3 + include/clang/Basic/DiagnosticSemaKinds.td | 6 +- include/clang/CodeGen/BackendUtil.h | 4 +- include/clang/Driver/ToolChain.h | 7 + include/clang/Frontend/ASTUnit.h | 34 +- include/clang/Frontend/CompilerInstance.h | 18 +- include/clang/Frontend/CompilerInvocation.h | 12 +- include/clang/Frontend/FrontendOptions.h | 2 +- include/clang/Frontend/Utils.h | 6 +- include/clang/Lex/HeaderSearch.h | 4 +- include/clang/Lex/HeaderSearchOptions.h | 2 +- include/clang/Lex/Preprocessor.h | 11 +- include/clang/Lex/PreprocessorOptions.h | 6 +- include/clang/Sema/CodeCompleteConsumer.h | 17 +- include/clang/Sema/Ownership.h | 8 +- include/clang/Sema/Sema.h | 15 +- include/clang/Serialization/ASTReader.h | 62 ++-- include/clang/Serialization/ASTWriter.h | 99 +++--- include/clang/Serialization/ModuleFileExtension.h | 2 +- .../StaticAnalyzer/Core/BugReporter/BugReporter.h | 7 +- .../Core/BugReporter/BugReporterVisitor.h | 136 ++++---- .../Core/BugReporter/PathDiagnostic.h | 29 +- include/clang/StaticAnalyzer/Core/CheckerManager.h | 8 +- include/clang/Tooling/Tooling.h | 8 +- lib/ARCMigrate/ARCMT.cpp | 4 +- lib/AST/ASTContext.cpp | 4 +- lib/ASTMatchers/Dynamic/VariantValue.cpp | 8 +- lib/Basic/Targets.cpp | 181 +++++++++-- lib/CodeGen/BackendUtil.cpp | 30 +- lib/CodeGen/CGBuiltin.cpp | 84 +++++ lib/CodeGen/CGCall.cpp | 6 +- lib/CodeGen/CGExpr.cpp | 11 +- lib/CodeGen/CGOpenMPRuntime.cpp | 23 +- lib/CodeGen/CGOpenMPRuntime.h | 36 ++- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 270 ++++++++-------- lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 51 +-- lib/CodeGen/CodeGenAction.cpp | 16 +- lib/CodeGen/CodeGenFunction.h | 2 +- lib/CodeGen/ObjectFilePCHContainerOperations.cpp | 9 +- lib/CodeGen/TargetInfo.cpp | 206 ++++++++++-- lib/Driver/Driver.cpp | 3 + lib/Driver/MSVCToolChain.cpp | 13 +- lib/Driver/MinGWToolChain.cpp | 17 +- lib/Driver/ToolChains.cpp | 37 ++- lib/Driver/ToolChains.h | 34 +- lib/Driver/Tools.cpp | 40 ++- lib/Driver/Tools.h | 13 + lib/Frontend/ASTUnit.cpp | 85 +++-- lib/Frontend/ChainedIncludesSource.cpp | 4 +- lib/Frontend/CompilerInstance.cpp | 38 ++- lib/Frontend/CompilerInvocation.cpp | 15 +- lib/Frontend/CreateInvocationFromCommandLine.cpp | 10 +- lib/Frontend/FrontendAction.cpp | 2 +- lib/Frontend/SerializedDiagnosticPrinter.cpp | 40 +-- lib/Frontend/TestModuleFileExtension.cpp | 4 +- lib/Headers/__clang_cuda_cmath.h | 10 +- lib/Headers/__clang_cuda_intrinsics.h | 42 +-- lib/Headers/altivec.h | 3 + lib/Headers/intrin.h | 90 ------ lib/Lex/HeaderSearch.cpp | 2 +- lib/Lex/Preprocessor.cpp | 2 +- lib/Parse/ParseDecl.cpp | 8 +- lib/Parse/ParseExpr.cpp | 2 + lib/Parse/ParsePragma.cpp | 6 +- lib/Sema/SemaCodeComplete.cpp | 10 +- lib/Sema/SemaDeclCXX.cpp | 75 ++--- lib/Sema/SemaExpr.cpp | 3 + lib/Sema/SemaExprCXX.cpp | 2 + lib/Sema/SemaOverload.cpp | 24 +- lib/Sema/SemaTemplateDeduction.cpp | 244 ++++++++------ lib/Sema/SemaTemplateInstantiateDecl.cpp | 42 ++- lib/Serialization/ASTReader.cpp | 52 +-- lib/Serialization/ASTWriter.cpp | 201 ++++++------ lib/Serialization/ASTWriterDecl.cpp | 58 ++-- lib/Serialization/GeneratePCH.cpp | 2 +- lib/Serialization/GlobalModuleIndex.cpp | 4 +- lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp | 19 +- .../Checkers/DynamicTypePropagation.cpp | 19 +- .../Checkers/LocalizationChecker.cpp | 16 +- .../Checkers/MPI-Checker/MPIBugReporter.cpp | 10 +- .../Checkers/MPI-Checker/MPIBugReporter.h | 8 +- .../Checkers/MacOSKeychainAPIChecker.cpp | 21 +- lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 18 +- lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp | 19 +- .../Checkers/ObjCSuperDeallocChecker.cpp | 18 +- lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp | 21 +- .../Checkers/TestAfterDivZeroChecker.cpp | 17 +- lib/StaticAnalyzer/Checkers/ValistChecker.cpp | 12 +- lib/StaticAnalyzer/Core/BugReporter.cpp | 298 ++++++++--------- lib/StaticAnalyzer/Core/BugReporterVisitors.cpp | 159 ++++----- lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 9 +- lib/StaticAnalyzer/Core/PathDiagnostic.cpp | 65 ++-- lib/StaticAnalyzer/Core/PlistDiagnostics.cpp | 40 +-- .../Frontend/CheckerRegistration.cpp | 2 +- lib/StaticAnalyzer/Frontend/ModelInjector.cpp | 7 +- lib/Tooling/Tooling.cpp | 15 +- test/CodeGen/builtins-ppc-error.c | 20 ++ test/CodeGen/builtins-ppc-p9vector.c | 47 ++- test/CodeGen/catch-undef-behavior.c | 22 +- test/CodeGen/sanitize-recover.c | 4 +- test/CodeGen/vectorcall.c | 78 +++-- test/CodeGenCXX/dllexport.cpp | 12 + test/CodeGenCXX/homogeneous-aggregates.cpp | 6 +- test/CodeGenCXX/ubsan-vtable-checks.cpp | 2 +- .../CUDA/v8.0/bin/.keep | 0 .../CUDA/v8.0/include/.keep | 0 .../CUDA/v8.0/lib/.keep | 0 .../v8.0/nvvm/libdevice/libdevice.compute_30.10.bc | 0 .../v8.0/nvvm/libdevice/libdevice.compute_35.10.bc | 0 test/Driver/avr-toolchain.c | 4 + test/Driver/cuda-version-check.cu | 22 +- test/Driver/cuda-windows.cu | 14 + test/Index/complete-block-properties.m | 2 +- test/Index/complete-block-property-assignment.m | 24 +- test/OpenMP/nvptx_target_codegen.cpp | 354 +++++++++++---------- test/OpenMP/target_codegen.cpp | 4 +- test/OpenMP/target_codegen_registration.cpp | 52 +-- test/OpenMP/teams_distribute_collapse_messages.cpp | 3 +- test/Preprocessor/cuda-types.cu | 16 + test/Preprocessor/init.c | 171 ++++++++++ test/Sema/warn-cast-align.c | 8 + test/Sema/warn-strict-prototypes.m | 5 +- test/Sema/warn-thread-safety-analysis.c | 4 + test/SemaCUDA/attr-declspec.cu | 34 ++ test/SemaCUDA/cuda-inherits-calling-conv.cu | 30 ++ test/SemaCXX/constant-expression-cxx11.cpp | 4 +- test/SemaCXX/conversion-function.cpp | 2 +- .../cxx0x-initializer-stdinitializerlist.cpp | 26 +- test/SemaCXX/cxx1z-decomposition.cpp | 5 + test/SemaCXX/default-arg-closures.cpp | 9 +- test/SemaCXX/dllexport.cpp | 21 ++ test/SemaCXX/type-definition-in-specifier.cpp | 6 +- test/SemaObjC/block-omitted-return-type.m | 2 +- test/SemaOpenCL/extensions.cl | 13 + test/SemaTemplate/deduction.cpp | 35 ++ test/SemaTemplate/instantiate-local-class.cpp | 11 + tools/c-index-test/core_main.cpp | 5 +- tools/clang-import-test/clang-import-test.cpp | 2 +- tools/diagtool/ShowEnabledWarnings.cpp | 4 +- tools/libclang/CIndex.cpp | 9 +- tools/libclang/CIndexCodeCompletion.cpp | 19 +- tools/libclang/CXIndexDataConsumer.cpp | 4 +- tools/libclang/CXIndexDataConsumer.h | 2 +- tools/libclang/CXTranslationUnit.h | 3 +- tools/libclang/Indexing.cpp | 31 +- unittests/AST/ExternalASTSourceTest.cpp | 4 +- unittests/ASTMatchers/ASTMatchersTraversalTest.cpp | 9 +- unittests/Basic/SourceManagerTest.cpp | 24 +- unittests/Format/FormatTestJS.cpp | 4 +- unittests/Frontend/CodeGenActionTest.cpp | 4 +- unittests/Frontend/FrontendActionTest.cpp | 20 +- unittests/Lex/LexerTest.cpp | 8 +- unittests/Lex/PPCallbacksTest.cpp | 18 +- unittests/Lex/PPConditionalDirectiveRecordTest.cpp | 8 +- utils/TableGen/ClangAttrEmitter.cpp | 13 +- 159 files changed, 2879 insertions(+), 1898 deletions(-) create mode 100644 test/CodeGen/builtins-ppc-error.c create mode 100644 test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/.keep create mode 100644 test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/include/.keep create mode 100644 test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/lib/.keep create mode 100644 test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_30.10.bc create mode 100644 test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_35.10.bc create mode 100644 test/Driver/avr-toolchain.c create mode 100644 test/Driver/cuda-windows.cu create mode 100644 test/SemaCUDA/attr-declspec.cu create mode 100644 test/SemaCUDA/cuda-inherits-calling-conv.cu diff --git a/examples/clang-interpreter/main.cpp b/examples/clang-interpreter/main.cpp index 9b4a257bcba3..f7832291f2b6 100644 --- a/examples/clang-interpreter/main.cpp +++ b/examples/clang-interpreter/main.cpp @@ -145,7 +145,7 @@ int main(int argc, const char **argv, char * const *envp) { // Create a compiler instance to handle the actual work. CompilerInstance Clang; - Clang.setInvocation(CI.release()); + Clang.setInvocation(std::move(CI)); // Create the compilers actual diagnostics engine. Clang.createDiagnostics(); diff --git a/include/clang/AST/DeclCXX.h b/include/clang/AST/DeclCXX.h index 06ecd3c37342..0ca08db16299 100644 --- a/include/clang/AST/DeclCXX.h +++ b/include/clang/AST/DeclCXX.h @@ -3181,7 +3181,7 @@ public: /// Get the using declaration from which this was instantiated. This will /// always be an UnresolvedUsingValueDecl or an UnresolvedUsingTypenameDecl /// that is a pack expansion. - NamedDecl *getInstantiatedFromUsingDecl() { return InstantiatedFrom; } + NamedDecl *getInstantiatedFromUsingDecl() const { return InstantiatedFrom; } /// Get the set of using declarations that this pack expanded into. Note that /// some of these may still be unresolved. diff --git a/include/clang/ASTMatchers/Dynamic/VariantValue.h b/include/clang/ASTMatchers/Dynamic/VariantValue.h index 9f694d0ce434..2c80b5137320 100644 --- a/include/clang/ASTMatchers/Dynamic/VariantValue.h +++ b/include/clang/ASTMatchers/Dynamic/VariantValue.h @@ -119,7 +119,7 @@ class VariantMatcher { /// \brief Payload interface to be specialized by each matcher type. /// /// It follows a similar interface as VariantMatcher itself. - class Payload : public RefCountedBase { + class Payload { public: virtual ~Payload(); virtual llvm::Optional getSingleMatcher() const = 0; @@ -208,7 +208,8 @@ public: std::string getTypeAsString() const; private: - explicit VariantMatcher(Payload *Value) : Value(Value) {} + explicit VariantMatcher(std::shared_ptr Value) + : Value(std::move(Value)) {} template struct TypedMatcherOps; @@ -216,7 +217,7 @@ private: class PolymorphicPayload; class VariadicOpPayload; - IntrusiveRefCntPtr Value; + std::shared_ptr Value; }; template diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td index 107a3bdffa65..e3c2b0e45d3d 100644 --- a/include/clang/Basic/Attr.td +++ b/include/clang/Basic/Attr.td @@ -601,49 +601,53 @@ def Constructor : InheritableAttr { let Documentation = [Undocumented]; } +// CUDA attributes are spelled __attribute__((attr)) or __declspec(__attr__). + def CUDAConstant : InheritableAttr { - let Spellings = [GNU<"constant">]; + let Spellings = [GNU<"constant">, Declspec<"__constant__">]; let Subjects = SubjectList<[Var]>; let LangOpts = [CUDA]; let Documentation = [Undocumented]; } def CUDACudartBuiltin : IgnoredAttr { - let Spellings = [GNU<"cudart_builtin">]; + let Spellings = [GNU<"cudart_builtin">, Declspec<"__cudart_builtin__">]; let LangOpts = [CUDA]; } def CUDADevice : InheritableAttr { - let Spellings = [GNU<"device">]; + let Spellings = [GNU<"device">, Declspec<"__device__">]; let Subjects = SubjectList<[Function, Var]>; let LangOpts = [CUDA]; let Documentation = [Undocumented]; } def CUDADeviceBuiltin : IgnoredAttr { - let Spellings = [GNU<"device_builtin">]; + let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">]; let LangOpts = [CUDA]; } def CUDADeviceBuiltinSurfaceType : IgnoredAttr { - let Spellings = [GNU<"device_builtin_surface_type">]; + let Spellings = [GNU<"device_builtin_surface_type">, + Declspec<"__device_builtin_surface_type__">]; let LangOpts = [CUDA]; } def CUDADeviceBuiltinTextureType : IgnoredAttr { - let Spellings = [GNU<"device_builtin_texture_type">]; + let Spellings = [GNU<"device_builtin_texture_type">, + Declspec<"__device_builtin_texture_type__">]; let LangOpts = [CUDA]; } def CUDAGlobal : InheritableAttr { - let Spellings = [GNU<"global">]; + let Spellings = [GNU<"global">, Declspec<"__global__">]; let Subjects = SubjectList<[Function]>; let LangOpts = [CUDA]; let Documentation = [Undocumented]; } def CUDAHost : InheritableAttr { - let Spellings = [GNU<"host">]; + let Spellings = [GNU<"host">, Declspec<"__host__">]; let Subjects = SubjectList<[Function]>; let LangOpts = [CUDA]; let Documentation = [Undocumented]; @@ -657,7 +661,7 @@ def CUDAInvalidTarget : InheritableAttr { } def CUDALaunchBounds : InheritableAttr { - let Spellings = [GNU<"launch_bounds">]; + let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">]; let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>]; let LangOpts = [CUDA]; let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag, @@ -669,7 +673,7 @@ def CUDALaunchBounds : InheritableAttr { } def CUDAShared : InheritableAttr { - let Spellings = [GNU<"shared">]; + let Spellings = [GNU<"shared">, Declspec<"__shared__">]; let Subjects = SubjectList<[Var]>; let LangOpts = [CUDA]; let Documentation = [Undocumented]; @@ -1195,6 +1199,8 @@ def NoThrow : InheritableAttr { } def NvWeak : IgnoredAttr { + // No Declspec spelling of this attribute; the CUDA headers use + // __attribute__((nv_weak)) unconditionally. let Spellings = [GNU<"nv_weak">]; let LangOpts = [CUDA]; } diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def index 657ea4225aa8..f7cddc03131b 100644 --- a/include/clang/Basic/BuiltinsPPC.def +++ b/include/clang/Basic/BuiltinsPPC.def @@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") +BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "") +BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 610fe0cb4c01..0807bba45fc4 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -3377,8 +3377,10 @@ def note_addrof_ovl_candidate_disabled_by_enable_if_attr : Note< "candidate function made ineligible by enable_if">; def note_ovl_candidate_deduced_mismatch : Note< "candidate template ignored: deduced type " - "%diff{$ of %ordinal0 parameter does not match adjusted type $ of argument" - "|of %ordinal0 parameter does not match adjusted type of argument}1,2%3">; + "%diff{$ of %select{|element of }4%ordinal0 parameter does not match " + "adjusted type $ of %select{|element of }4argument" + "|of %select{|element of }4%ordinal0 parameter does not match " + "adjusted type of %select{|element of }4argument}1,2%3">; def note_ovl_candidate_non_deduced_mismatch : Note< "candidate template ignored: could not match %diff{$ against $|types}0,1">; // This note is needed because the above note would sometimes print two diff --git a/include/clang/CodeGen/BackendUtil.h b/include/clang/CodeGen/BackendUtil.h index 01721d322098..c6abc6e3f574 100644 --- a/include/clang/CodeGen/BackendUtil.h +++ b/include/clang/CodeGen/BackendUtil.h @@ -21,6 +21,7 @@ namespace llvm { namespace clang { class DiagnosticsEngine; + class HeaderSearchOptions; class CodeGenOptions; class TargetOptions; class LangOptions; @@ -34,7 +35,8 @@ namespace clang { Backend_EmitObj ///< Emit native object files }; - void EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts, + void EmitBackendOutput(DiagnosticsEngine &Diags, const HeaderSearchOptions &, + const CodeGenOptions &CGOpts, const TargetOptions &TOpts, const LangOptions &LOpts, const llvm::DataLayout &TDesc, llvm::Module *M, BackendAction Action, diff --git a/include/clang/Driver/ToolChain.h b/include/clang/Driver/ToolChain.h index cca239c4be2a..ffb0d60a6398 100644 --- a/include/clang/Driver/ToolChain.h +++ b/include/clang/Driver/ToolChain.h @@ -139,6 +139,13 @@ public: vfs::FileSystem &getVFS() const; const llvm::Triple &getTriple() const { return Triple; } + /// Get the toolchain's aux triple, if it has one. + /// + /// Exactly what the aux triple represents depends on the toolchain, but for + /// example when compiling CUDA code for the GPU, the triple might be NVPTX, + /// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu. + virtual const llvm::Triple *getAuxTriple() const { return nullptr; } + llvm::Triple::ArchType getArch() const { return Triple.getArch(); } StringRef getArchName() const { return Triple.getArchName(); } StringRef getPlatform() const { return Triple.getVendorName(); } diff --git a/include/clang/Frontend/ASTUnit.h b/include/clang/Frontend/ASTUnit.h index cc8d4e6e3e70..b1cdb46d505b 100644 --- a/include/clang/Frontend/ASTUnit.h +++ b/include/clang/Frontend/ASTUnit.h @@ -86,10 +86,10 @@ private: IntrusiveRefCntPtr SourceMgr; std::unique_ptr HeaderInfo; IntrusiveRefCntPtr Target; - IntrusiveRefCntPtr PP; + std::shared_ptr PP; IntrusiveRefCntPtr Ctx; std::shared_ptr TargetOpts; - IntrusiveRefCntPtr HSOpts; + std::shared_ptr HSOpts; IntrusiveRefCntPtr Reader; bool HadModuleLoaderFatalFailure; @@ -108,8 +108,8 @@ private: /// Optional owned invocation, just used to make the invocation used in /// LoadFromCommandLine available. - IntrusiveRefCntPtr Invocation; - + std::shared_ptr Invocation; + // OnlyLocalDecls - when true, walking this AST should only visit declarations // that come from the AST itself, not from included precompiled headers. // FIXME: This is temporary; eventually, CIndex will always do this. @@ -358,22 +358,21 @@ public: } /// \brief Retrieve the allocator used to cache global code completions. - IntrusiveRefCntPtr + std::shared_ptr getCachedCompletionAllocator() { return CachedCompletionAllocator; } CodeCompletionTUInfo &getCodeCompletionTUInfo() { if (!CCTUInfo) - CCTUInfo.reset(new CodeCompletionTUInfo( - new GlobalCodeCompletionAllocator)); + CCTUInfo = llvm::make_unique( + std::make_shared()); return *CCTUInfo; } private: /// \brief Allocator used to store cached code completions. - IntrusiveRefCntPtr - CachedCompletionAllocator; + std::shared_ptr CachedCompletionAllocator; std::unique_ptr CCTUInfo; @@ -496,12 +495,13 @@ public: const Preprocessor &getPreprocessor() const { return *PP; } Preprocessor &getPreprocessor() { return *PP; } + std::shared_ptr getPreprocessorPtr() const { return PP; } const ASTContext &getASTContext() const { return *Ctx; } ASTContext &getASTContext() { return *Ctx; } void setASTContext(ASTContext *ctx) { Ctx = ctx; } - void setPreprocessor(Preprocessor *pp); + void setPreprocessor(std::shared_ptr pp); bool hasSema() const { return (bool)TheSema; } Sema &getSema() const { @@ -701,11 +701,11 @@ public: /// remapped contents of that file. typedef std::pair RemappedFile; - /// \brief Create a ASTUnit. Gets ownership of the passed CompilerInvocation. - static ASTUnit *create(CompilerInvocation *CI, - IntrusiveRefCntPtr Diags, - bool CaptureDiagnostics, - bool UserFilesAreVolatile); + /// \brief Create a ASTUnit. Gets ownership of the passed CompilerInvocation. + static std::unique_ptr + create(std::shared_ptr CI, + IntrusiveRefCntPtr Diags, bool CaptureDiagnostics, + bool UserFilesAreVolatile); /// \brief Create a ASTUnit from an AST file. /// @@ -770,7 +770,7 @@ public: /// created ASTUnit was passed in \p Unit then the caller can check that. /// static ASTUnit *LoadFromCompilerInvocationAction( - CompilerInvocation *CI, + std::shared_ptr CI, std::shared_ptr PCHContainerOps, IntrusiveRefCntPtr Diags, FrontendAction *Action = nullptr, ASTUnit *Unit = nullptr, @@ -797,7 +797,7 @@ public: // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we // shouldn't need to specify them at construction time. static std::unique_ptr LoadFromCompilerInvocation( - CompilerInvocation *CI, + std::shared_ptr CI, std::shared_ptr PCHContainerOps, IntrusiveRefCntPtr Diags, FileManager *FileMgr, bool OnlyLocalDecls = false, bool CaptureDiagnostics = false, diff --git a/include/clang/Frontend/CompilerInstance.h b/include/clang/Frontend/CompilerInstance.h index 3f754d999874..3ebbc61515c6 100644 --- a/include/clang/Frontend/CompilerInstance.h +++ b/include/clang/Frontend/CompilerInstance.h @@ -70,7 +70,7 @@ class TargetInfo; /// and a long form that takes explicit instances of any required objects. class CompilerInstance : public ModuleLoader { /// The options used in this compiler instance. - IntrusiveRefCntPtr Invocation; + std::shared_ptr Invocation; /// The diagnostics engine instance. IntrusiveRefCntPtr Diagnostics; @@ -91,7 +91,7 @@ class CompilerInstance : public ModuleLoader { IntrusiveRefCntPtr SourceMgr; /// The preprocessor. - IntrusiveRefCntPtr PP; + std::shared_ptr PP; /// The AST context. IntrusiveRefCntPtr Context; @@ -228,7 +228,7 @@ public: } /// setInvocation - Replace the current invocation. - void setInvocation(CompilerInvocation *Value); + void setInvocation(std::shared_ptr Value); /// \brief Indicates whether we should (re)build the global module index. bool shouldBuildGlobalModuleIndex() const; @@ -288,6 +288,9 @@ public: const HeaderSearchOptions &getHeaderSearchOpts() const { return Invocation->getHeaderSearchOpts(); } + std::shared_ptr getHeaderSearchOptsPtr() const { + return Invocation->getHeaderSearchOptsPtr(); + } LangOptions &getLangOpts() { return *Invocation->getLangOpts(); @@ -433,13 +436,14 @@ public: return *PP; } + std::shared_ptr getPreprocessorPtr() { return PP; } + void resetAndLeakPreprocessor() { - BuryPointer(PP.get()); - PP.resetWithoutRelease(); + BuryPointer(new std::shared_ptr(PP)); } /// Replace the current preprocessor. - void setPreprocessor(Preprocessor *Value); + void setPreprocessor(std::shared_ptr Value); /// } /// @name ASTContext @@ -653,7 +657,7 @@ public: StringRef Path, StringRef Sysroot, bool DisablePCHValidation, bool AllowPCHWithCompilerErrors, Preprocessor &PP, ASTContext &Context, const PCHContainerReader &PCHContainerRdr, - ArrayRef> Extensions, + ArrayRef> Extensions, void *DeserializationListener, bool OwnDeserializationListener, bool Preamble, bool UseGlobalModuleIndex); diff --git a/include/clang/Frontend/CompilerInvocation.h b/include/clang/Frontend/CompilerInvocation.h index cb037c26546f..cef7f73ecaa0 100644 --- a/include/clang/Frontend/CompilerInvocation.h +++ b/include/clang/Frontend/CompilerInvocation.h @@ -51,7 +51,7 @@ bool ParseDiagnosticArgs(DiagnosticOptions &Opts, llvm::opt::ArgList &Args, bool DefaultDiagColor = true, bool DefaultShowOpt = true); -class CompilerInvocationBase : public RefCountedBase { +class CompilerInvocationBase { void operator=(const CompilerInvocationBase &) = delete; public: @@ -65,10 +65,10 @@ public: IntrusiveRefCntPtr DiagnosticOpts; /// Options controlling the \#include directive. - IntrusiveRefCntPtr HeaderSearchOpts; + std::shared_ptr HeaderSearchOpts; /// Options controlling the preprocessor (aside from \#include handling). - IntrusiveRefCntPtr PreprocessorOpts; + std::shared_ptr PreprocessorOpts; CompilerInvocationBase(); ~CompilerInvocationBase(); @@ -89,7 +89,13 @@ public: const HeaderSearchOptions &getHeaderSearchOpts() const { return *HeaderSearchOpts; } + std::shared_ptr getHeaderSearchOptsPtr() const { + return HeaderSearchOpts; + } + std::shared_ptr getPreprocessorOptsPtr() { + return PreprocessorOpts; + } PreprocessorOptions &getPreprocessorOpts() { return *PreprocessorOpts; } const PreprocessorOptions &getPreprocessorOpts() const { return *PreprocessorOpts; diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h index aad397526a03..9c960bb0c305 100644 --- a/include/clang/Frontend/FrontendOptions.h +++ b/include/clang/Frontend/FrontendOptions.h @@ -243,7 +243,7 @@ public: std::vector Plugins; /// The list of module file extensions. - std::vector> ModuleFileExtensions; + std::vector> ModuleFileExtensions; /// \brief The list of module map files to load before processing the input. std::vector ModuleMapFiles; diff --git a/include/clang/Frontend/Utils.h b/include/clang/Frontend/Utils.h index 60419ff9b41d..0ee46846c804 100644 --- a/include/clang/Frontend/Utils.h +++ b/include/clang/Frontend/Utils.h @@ -184,10 +184,10 @@ createChainedIncludesSource(CompilerInstance &CI, /// /// \return A CompilerInvocation, or 0 if none was built for the given /// argument vector. -CompilerInvocation * +std::unique_ptr createInvocationFromCommandLine(ArrayRef Args, - IntrusiveRefCntPtr Diags = - IntrusiveRefCntPtr()); + IntrusiveRefCntPtr Diags = + IntrusiveRefCntPtr()); /// Return the value of the last argument as an integer, or a default. If Diags /// is non-null, emits an error if the argument is given, but non-integral. diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h index b145d7bae15a..4df3e783117a 100644 --- a/include/clang/Lex/HeaderSearch.h +++ b/include/clang/Lex/HeaderSearch.h @@ -147,7 +147,7 @@ class HeaderSearch { }; /// \brief Header-search options used to initialize this header search. - IntrusiveRefCntPtr HSOpts; + std::shared_ptr HSOpts; DiagnosticsEngine &Diags; FileManager &FileMgr; @@ -248,7 +248,7 @@ class HeaderSearch { friend class DirectoryLookup; public: - HeaderSearch(IntrusiveRefCntPtr HSOpts, + HeaderSearch(std::shared_ptr HSOpts, SourceManager &SourceMgr, DiagnosticsEngine &Diags, const LangOptions &LangOpts, const TargetInfo *Target); ~HeaderSearch(); diff --git a/include/clang/Lex/HeaderSearchOptions.h b/include/clang/Lex/HeaderSearchOptions.h index 815b68c60e80..e99980537348 100644 --- a/include/clang/Lex/HeaderSearchOptions.h +++ b/include/clang/Lex/HeaderSearchOptions.h @@ -44,7 +44,7 @@ namespace frontend { /// HeaderSearchOptions - Helper class for storing options related to the /// initialization of the HeaderSearch object. -class HeaderSearchOptions : public RefCountedBase { +class HeaderSearchOptions { public: struct Entry { std::string Path; diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index bb71f49290b4..7ce1aad36d12 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -94,8 +94,8 @@ enum MacroUse { /// Lexers know only about tokens within a single source file, and don't /// know anything about preprocessor-level issues like the \#include stack, /// token expansion, etc. -class Preprocessor : public RefCountedBase { - IntrusiveRefCntPtr PPOpts; +class Preprocessor { + std::shared_ptr PPOpts; DiagnosticsEngine *Diags; LangOptions &LangOpts; const TargetInfo *Target; @@ -650,10 +650,9 @@ class Preprocessor : public RefCountedBase { void updateOutOfDateIdentifier(IdentifierInfo &II) const; public: - Preprocessor(IntrusiveRefCntPtr PPOpts, - DiagnosticsEngine &diags, LangOptions &opts, - SourceManager &SM, HeaderSearch &Headers, - ModuleLoader &TheModuleLoader, + Preprocessor(std::shared_ptr PPOpts, + DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, + HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup = nullptr, bool OwnsHeaderSearch = false, TranslationUnitKind TUKind = TU_Complete); diff --git a/include/clang/Lex/PreprocessorOptions.h b/include/clang/Lex/PreprocessorOptions.h index de652cccb83a..58d79f7ff81a 100644 --- a/include/clang/Lex/PreprocessorOptions.h +++ b/include/clang/Lex/PreprocessorOptions.h @@ -40,7 +40,7 @@ enum ObjCXXARCStandardLibraryKind { /// PreprocessorOptions - This class is used for passing the various options /// used in preprocessor initialization to InitializePreprocessor(). -class PreprocessorOptions : public RefCountedBase { +class PreprocessorOptions { public: std::vector > Macros; std::vector Includes; @@ -117,7 +117,7 @@ public: ObjCXXARCStandardLibraryKind ObjCXXARCStandardLibrary; /// \brief Records the set of modules - class FailedModulesSet : public RefCountedBase { + class FailedModulesSet { llvm::StringSet<> Failed; public: @@ -136,7 +136,7 @@ public: /// to (re)build modules, so that once a module fails to build anywhere, /// other instances will see that the module has failed and won't try to /// build it again. - IntrusiveRefCntPtr FailedModules; + std::shared_ptr FailedModules; public: PreprocessorOptions() : UsePredefines(true), DetailedRecord(false), diff --git a/include/clang/Sema/CodeCompleteConsumer.h b/include/clang/Sema/CodeCompleteConsumer.h index b80924ea11fc..dee53dc14a8c 100644 --- a/include/clang/Sema/CodeCompleteConsumer.h +++ b/include/clang/Sema/CodeCompleteConsumer.h @@ -509,23 +509,18 @@ public: }; /// \brief Allocator for a cached set of global code completions. -class GlobalCodeCompletionAllocator - : public CodeCompletionAllocator, - public RefCountedBase -{ - -}; +class GlobalCodeCompletionAllocator : public CodeCompletionAllocator {}; class CodeCompletionTUInfo { llvm::DenseMap ParentNames; - IntrusiveRefCntPtr AllocatorRef; + std::shared_ptr AllocatorRef; public: explicit CodeCompletionTUInfo( - IntrusiveRefCntPtr Allocator) + std::shared_ptr Allocator) : AllocatorRef(std::move(Allocator)) {} - IntrusiveRefCntPtr getAllocatorRef() const { + std::shared_ptr getAllocatorRef() const { return AllocatorRef; } CodeCompletionAllocator &getAllocator() const { @@ -965,8 +960,8 @@ public: /// results to the given raw output stream. PrintingCodeCompleteConsumer(const CodeCompleteOptions &CodeCompleteOpts, raw_ostream &OS) - : CodeCompleteConsumer(CodeCompleteOpts, false), OS(OS), - CCTUInfo(new GlobalCodeCompletionAllocator) {} + : CodeCompleteConsumer(CodeCompleteOpts, false), OS(OS), + CCTUInfo(std::make_shared()) {} /// \brief Prints the finalized code-completion results. void ProcessCodeCompleteResults(Sema &S, CodeCompletionContext Context, diff --git a/include/clang/Sema/Ownership.h b/include/clang/Sema/Ownership.h index 92ea5296c45b..fd46de870fb4 100644 --- a/include/clang/Sema/Ownership.h +++ b/include/clang/Sema/Ownership.h @@ -153,8 +153,8 @@ namespace clang { ActionResult(const DiagnosticBuilder &) : Val(PtrTy()), Invalid(true) {} // These two overloads prevent void* -> bool conversions. - ActionResult(const void *); - ActionResult(volatile void *); + ActionResult(const void *) = delete; + ActionResult(volatile void *) = delete; bool isInvalid() const { return Invalid; } bool isUsable() const { return !Invalid && Val; } @@ -192,8 +192,8 @@ namespace clang { ActionResult(const DiagnosticBuilder &) : PtrWithInvalid(0x01) { } // These two overloads prevent void* -> bool conversions. - ActionResult(const void *); - ActionResult(volatile void *); + ActionResult(const void *) = delete; + ActionResult(volatile void *) = delete; bool isInvalid() const { return PtrWithInvalid & 0x01; } bool isUsable() const { return PtrWithInvalid > 0x01; } diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 3762253ef113..ca984a360a60 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -6564,6 +6564,10 @@ public: /// \brief After substituting deduced template arguments, a dependent /// parameter type did not match the corresponding argument. TDK_DeducedMismatch, + /// \brief After substituting deduced template arguments, an element of + /// a dependent parameter type did not match the corresponding element + /// of the corresponding argument (when deducing from an initializer list). + TDK_DeducedMismatchNested, /// \brief A non-depnedent component of the parameter did not match the /// corresponding component of the argument. TDK_NonDeducedMismatch, @@ -6602,13 +6606,14 @@ public: /// brief A function argument from which we performed template argument // deduction for a call. struct OriginalCallArg { - OriginalCallArg(QualType OriginalParamType, - unsigned ArgIdx, - QualType OriginalArgType) - : OriginalParamType(OriginalParamType), ArgIdx(ArgIdx), - OriginalArgType(OriginalArgType) { } + OriginalCallArg(QualType OriginalParamType, bool DecomposedParam, + unsigned ArgIdx, QualType OriginalArgType) + : OriginalParamType(OriginalParamType), + DecomposedParam(DecomposedParam), ArgIdx(ArgIdx), + OriginalArgType(OriginalArgType) {} QualType OriginalParamType; + bool DecomposedParam; unsigned ArgIdx; QualType OriginalArgType; }; diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h index 5230e2ae0013..93994e2c519c 100644 --- a/include/clang/Serialization/ASTReader.h +++ b/include/clang/Serialization/ASTReader.h @@ -384,8 +384,8 @@ private: std::unique_ptr Listener; /// \brief The receiver of deserialization events. - ASTDeserializationListener *DeserializationListener; - bool OwnsDeserializationListener; + ASTDeserializationListener *DeserializationListener = nullptr; + bool OwnsDeserializationListener = false; SourceManager &SourceMgr; FileManager &FileMgr; @@ -394,7 +394,7 @@ private: /// \brief The semantic analysis object that will be processing the /// AST files and the translation unit that uses it. - Sema *SemaObj; + Sema *SemaObj = nullptr; /// \brief The preprocessor that will be loading the source file. Preprocessor &PP; @@ -403,7 +403,7 @@ private: ASTContext &Context; /// \brief The AST consumer. - ASTConsumer *Consumer; + ASTConsumer *Consumer = nullptr; /// \brief The module manager which manages modules and their dependencies ModuleManager ModuleMgr; @@ -414,7 +414,7 @@ private: IdentifierResolver DummyIdResolver; /// A mapping from extension block names to module file extensions. - llvm::StringMap> ModuleFileExtensions; + llvm::StringMap> ModuleFileExtensions; /// \brief A timer used to track the time spent deserializing. std::unique_ptr ReadTimer; @@ -802,10 +802,10 @@ private: SourceLocation OptimizeOffPragmaLocation; /// \brief The PragmaMSStructKind pragma ms_struct state if set, or -1. - int PragmaMSStructState; + int PragmaMSStructState = -1; /// \brief The PragmaMSPointersToMembersKind pragma pointers_to_members state. - int PragmaMSPointersToMembersState; + int PragmaMSPointersToMembersState = -1; SourceLocation PointersToMembersPragmaLocation; /// \brief The OpenCL extension settings. @@ -870,10 +870,10 @@ private: bool UseGlobalIndex; /// \brief Whether we have tried loading the global module index yet. - bool TriedLoadingGlobalIndex; + bool TriedLoadingGlobalIndex = false; ///\brief Whether we are currently processing update records. - bool ProcessingUpdateRecords; + bool ProcessingUpdateRecords = false; typedef llvm::DenseMap SwitchCaseMapTy; /// \brief Mapping from switch-case IDs in the chain to switch-case statements @@ -886,73 +886,73 @@ private: /// \brief The number of source location entries de-serialized from /// the PCH file. - unsigned NumSLocEntriesRead; + unsigned NumSLocEntriesRead = 0; /// \brief The number of source location entries in the chain. - unsigned TotalNumSLocEntries; + unsigned TotalNumSLocEntries = 0; /// \brief The number of statements (and expressions) de-serialized /// from the chain. - unsigned NumStatementsRead; + unsigned NumStatementsRead = 0; /// \brief The total number of statements (and expressions) stored /// in the chain. - unsigned TotalNumStatements; + unsigned TotalNumStatements = 0; /// \brief The number of macros de-serialized from the chain. - unsigned NumMacrosRead; + unsigned NumMacrosRead = 0; /// \brief The total number of macros stored in the chain. - unsigned TotalNumMacros; + unsigned TotalNumMacros = 0; /// \brief The number of lookups into identifier tables. - unsigned NumIdentifierLookups; + unsigned NumIdentifierLookups = 0; /// \brief The number of lookups into identifier tables that succeed. - unsigned NumIdentifierLookupHits; + unsigned NumIdentifierLookupHits = 0; /// \brief The number of selectors that have been read. - unsigned NumSelectorsRead; + unsigned NumSelectorsRead = 0; /// \brief The number of method pool entries that have been read. - unsigned NumMethodPoolEntriesRead; + unsigned NumMethodPoolEntriesRead = 0; /// \brief The number of times we have looked up a selector in the method /// pool. - unsigned NumMethodPoolLookups; + unsigned NumMethodPoolLookups = 0; /// \brief The number of times we have looked up a selector in the method /// pool and found something. - unsigned NumMethodPoolHits; + unsigned NumMethodPoolHits = 0; /// \brief The number of times we have looked up a selector in the method /// pool within a specific module. - unsigned NumMethodPoolTableLookups; + unsigned NumMethodPoolTableLookups = 0; /// \brief The number of times we have looked up a selector in the method /// pool within a specific module and found something. - unsigned NumMethodPoolTableHits; + unsigned NumMethodPoolTableHits = 0; /// \brief The total number of method pool entries in the selector table. - unsigned TotalNumMethodPoolEntries; + unsigned TotalNumMethodPoolEntries = 0; /// Number of lexical decl contexts read/total. - unsigned NumLexicalDeclContextsRead, TotalLexicalDeclContexts; + unsigned NumLexicalDeclContextsRead = 0, TotalLexicalDeclContexts = 0; /// Number of visible decl contexts read/total. - unsigned NumVisibleDeclContextsRead, TotalVisibleDeclContexts; + unsigned NumVisibleDeclContextsRead = 0, TotalVisibleDeclContexts = 0; /// Total size of modules, in bits, currently loaded - uint64_t TotalModulesSizeInBits; + uint64_t TotalModulesSizeInBits = 0; /// \brief Number of Decl/types that are currently deserializing. - unsigned NumCurrentElementsDeserializing; + unsigned NumCurrentElementsDeserializing = 0; /// \brief Set true while we are in the process of passing deserialized /// "interesting" decls to consumer inside FinishedDeserializing(). /// This is used as a guard to avoid recursively repeating the process of /// passing decls to consumer. - bool PassingDeclsToConsumer; + bool PassingDeclsToConsumer = false; /// \brief The set of identifiers that were read while the AST reader was /// (recursively) loading declarations. @@ -1055,7 +1055,7 @@ private: }; /// \brief What kind of records we are reading. - ReadingKind ReadingKind; + ReadingKind ReadingKind = Read_None; /// \brief RAII object to change the reading kind. class ReadingKindTracker { @@ -1366,7 +1366,7 @@ public: /// deserializing. ASTReader(Preprocessor &PP, ASTContext &Context, const PCHContainerReader &PCHContainerRdr, - ArrayRef> Extensions, + ArrayRef> Extensions, StringRef isysroot = "", bool DisableValidation = false, bool AllowASTWithCompilerErrors = false, bool AllowConfigurationMismatch = false, diff --git a/include/clang/Serialization/ASTWriter.h b/include/clang/Serialization/ASTWriter.h index 1469555ec21e..0d6b0268109d 100644 --- a/include/clang/Serialization/ASTWriter.h +++ b/include/clang/Serialization/ASTWriter.h @@ -107,16 +107,16 @@ private: llvm::BitstreamWriter &Stream; /// \brief The ASTContext we're writing. - ASTContext *Context; + ASTContext *Context = nullptr; /// \brief The preprocessor we're writing. - Preprocessor *PP; + Preprocessor *PP = nullptr; /// \brief The reader of existing AST files, if we're chaining. - ASTReader *Chain; + ASTReader *Chain = nullptr; /// \brief The module we're currently writing, if any. - Module *WritingModule; + Module *WritingModule = nullptr; /// \brief The base directory for any relative paths we emit. std::string BaseDirectory; @@ -129,14 +129,14 @@ private: /// \brief Indicates when the AST writing is actively performing /// serialization, rather than just queueing updates. - bool WritingAST; + bool WritingAST = false; /// \brief Indicates that we are done serializing the collection of decls /// and types to emit. - bool DoneWritingDeclsAndTypes; + bool DoneWritingDeclsAndTypes = false; /// \brief Indicates that the AST contained compiler errors. - bool ASTHasCompilerErrors; + bool ASTHasCompilerErrors = false; /// \brief Mapping from input file entries to the index into the /// offset table where information about that input file is stored. @@ -170,10 +170,10 @@ private: std::queue DeclTypesToEmit; /// \brief The first ID number we can use for our own declarations. - serialization::DeclID FirstDeclID; + serialization::DeclID FirstDeclID = serialization::NUM_PREDEF_DECL_IDS; /// \brief The decl ID that will be assigned to the next new decl. - serialization::DeclID NextDeclID; + serialization::DeclID NextDeclID = FirstDeclID; /// \brief Map that provides the ID numbers of each declaration within /// the output stream, as well as those deserialized from a chained PCH. @@ -205,10 +205,10 @@ private: void associateDeclWithFile(const Decl *D, serialization::DeclID); /// \brief The first ID number we can use for our own types. - serialization::TypeID FirstTypeID; + serialization::TypeID FirstTypeID = serialization::NUM_PREDEF_TYPE_IDS; /// \brief The type ID that will be assigned to the next new type. - serialization::TypeID NextTypeID; + serialization::TypeID NextTypeID = FirstTypeID; /// \brief Map that provides the ID numbers of each type within the /// output stream, plus those deserialized from a chained PCH. @@ -226,10 +226,10 @@ private: std::vector TypeOffsets; /// \brief The first ID number we can use for our own identifiers. - serialization::IdentID FirstIdentID; + serialization::IdentID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS; /// \brief The identifier ID that will be assigned to the next new identifier. - serialization::IdentID NextIdentID; + serialization::IdentID NextIdentID = FirstIdentID; /// \brief Map that provides the ID numbers of each identifier in /// the output stream. @@ -240,10 +240,10 @@ private: llvm::MapVector IdentifierIDs; /// \brief The first ID number we can use for our own macros. - serialization::MacroID FirstMacroID; + serialization::MacroID FirstMacroID = serialization::NUM_PREDEF_MACRO_IDS; /// \brief The identifier ID that will be assigned to the next new identifier. - serialization::MacroID NextMacroID; + serialization::MacroID NextMacroID = FirstMacroID; /// \brief Map that provides the ID numbers of each macro. llvm::DenseMap MacroIDs; @@ -275,16 +275,18 @@ private: std::vector IdentifierOffsets; /// \brief The first ID number we can use for our own submodules. - serialization::SubmoduleID FirstSubmoduleID; - + serialization::SubmoduleID FirstSubmoduleID = + serialization::NUM_PREDEF_SUBMODULE_IDS; + /// \brief The submodule ID that will be assigned to the next new submodule. - serialization::SubmoduleID NextSubmoduleID; + serialization::SubmoduleID NextSubmoduleID = FirstSubmoduleID; /// \brief The first ID number we can use for our own selectors. - serialization::SelectorID FirstSelectorID; + serialization::SelectorID FirstSelectorID = + serialization::NUM_PREDEF_SELECTOR_IDS; /// \brief The selector ID that will be assigned to the next new selector. - serialization::SelectorID NextSelectorID; + serialization::SelectorID NextSelectorID = FirstSelectorID; /// \brief Map that provides the ID numbers of each Selector. llvm::MapVector SelectorIDs; @@ -394,18 +396,18 @@ private: llvm::DenseMap SwitchCaseIDs; /// \brief The number of statements written to the AST file. - unsigned NumStatements; + unsigned NumStatements = 0; /// \brief The number of macros written to the AST file. - unsigned NumMacros; + unsigned NumMacros = 0; /// \brief The number of lexical declcontexts written to the AST /// file. - unsigned NumLexicalDeclContexts; + unsigned NumLexicalDeclContexts = 0; /// \brief The number of visible declcontexts written to the AST /// file. - unsigned NumVisibleDeclContexts; + unsigned NumVisibleDeclContexts = 0; /// \brief A mapping from each known submodule to its ID number, which will /// be a positive integer. @@ -436,8 +438,8 @@ private: void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, bool isModule); - unsigned TypeExtQualAbbrev; - unsigned TypeFunctionProtoAbbrev; + unsigned TypeExtQualAbbrev = 0; + unsigned TypeFunctionProtoAbbrev = 0; void WriteTypeAbbrevs(); void WriteType(QualType T); @@ -470,22 +472,22 @@ private: void WriteModuleFileExtension(Sema &SemaRef, ModuleFileExtensionWriter &Writer); - unsigned DeclParmVarAbbrev; - unsigned DeclContextLexicalAbbrev; - unsigned DeclContextVisibleLookupAbbrev; - unsigned UpdateVisibleAbbrev; - unsigned DeclRecordAbbrev; - unsigned DeclTypedefAbbrev; - unsigned DeclVarAbbrev; - unsigned DeclFieldAbbrev; - unsigned DeclEnumAbbrev; - unsigned DeclObjCIvarAbbrev; - unsigned DeclCXXMethodAbbrev; - - unsigned DeclRefExprAbbrev; - unsigned CharacterLiteralAbbrev; - unsigned IntegerLiteralAbbrev; - unsigned ExprImplicitCastAbbrev; + unsigned DeclParmVarAbbrev = 0; + unsigned DeclContextLexicalAbbrev = 0; + unsigned DeclContextVisibleLookupAbbrev = 0; + unsigned UpdateVisibleAbbrev = 0; + unsigned DeclRecordAbbrev = 0; + unsigned DeclTypedefAbbrev = 0; + unsigned DeclVarAbbrev = 0; + unsigned DeclFieldAbbrev = 0; + unsigned DeclEnumAbbrev = 0; + unsigned DeclObjCIvarAbbrev = 0; + unsigned DeclCXXMethodAbbrev = 0; + + unsigned DeclRefExprAbbrev = 0; + unsigned CharacterLiteralAbbrev = 0; + unsigned IntegerLiteralAbbrev = 0; + unsigned ExprImplicitCastAbbrev = 0; void WriteDeclAbbrevs(); void WriteDecl(ASTContext &Context, Decl *D); @@ -498,7 +500,7 @@ public: /// \brief Create a new precompiled header writer that outputs to /// the given bitstream. ASTWriter(llvm::BitstreamWriter &Stream, - ArrayRef> Extensions, + ArrayRef> Extensions, bool IncludeTimestamps = true); ~ASTWriter() override; @@ -934,13 +936,10 @@ protected: SmallVectorImpl &getPCH() const { return Buffer->Data; } public: - PCHGenerator( - const Preprocessor &PP, StringRef OutputFile, - StringRef isysroot, - std::shared_ptr Buffer, - ArrayRef> Extensions, - bool AllowASTWithErrors = false, - bool IncludeTimestamps = true); + PCHGenerator(const Preprocessor &PP, StringRef OutputFile, StringRef isysroot, + std::shared_ptr Buffer, + ArrayRef> Extensions, + bool AllowASTWithErrors = false, bool IncludeTimestamps = true); ~PCHGenerator() override; void InitializeSema(Sema &S) override { SemaPtr = &S; } void HandleTranslationUnit(ASTContext &Ctx) override; diff --git a/include/clang/Serialization/ModuleFileExtension.h b/include/clang/Serialization/ModuleFileExtension.h index ba2e2fd0d9f1..f7bdcec598f1 100644 --- a/include/clang/Serialization/ModuleFileExtension.h +++ b/include/clang/Serialization/ModuleFileExtension.h @@ -60,7 +60,7 @@ class ModuleFileExtensionWriter; /// compiled module files (.pcm) and precompiled headers (.pch) via a /// custom writer that can then be accessed via a custom reader when /// the module file or precompiled header is loaded. -class ModuleFileExtension : public llvm::RefCountedBase { +class ModuleFileExtension { public: virtual ~ModuleFileExtension(); diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h index 73f4dd5a3e91..0f1eb096c495 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -66,8 +66,7 @@ public: typedef SmallVector, 8> VisitorList; typedef VisitorList::iterator visitor_iterator; typedef SmallVector ExtraTextList; - typedef SmallVector, 4> - NoteList; + typedef SmallVector, 4> NoteList; protected: friend class BugReporter; @@ -268,12 +267,12 @@ public: /// the extra note should appear. void addNote(StringRef Msg, const PathDiagnosticLocation &Pos, ArrayRef Ranges) { - PathDiagnosticNotePiece *P = new PathDiagnosticNotePiece(Pos, Msg); + auto P = std::make_shared(Pos, Msg); for (const auto &R : Ranges) P->addRange(R); - Notes.push_back(P); + Notes.push_back(std::move(P)); } // FIXME: Instead of making an override, we could have default-initialized diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h index 8c3a1d0d4b40..b72bce5fc9f8 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h @@ -59,10 +59,9 @@ public: /// /// The last parameter can be used to register a new visitor with the given /// BugReport while processing a node. - virtual PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) = 0; + virtual std::shared_ptr + VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred, + BugReporterContext &BRC, BugReport &BR) = 0; /// \brief Provide custom definition for the final diagnostic piece on the /// path - the piece, which is displayed before the path is expanded. @@ -121,10 +120,10 @@ public: void Profile(llvm::FoldingSetNodeID &ID) const override; - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; }; class TrackConstraintBRVisitor final @@ -150,10 +149,10 @@ public: /// to make all PathDiagnosticPieces created by this visitor. static const char *getTag(); - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; private: /// Checks if the constraint is valid in the current state. @@ -172,10 +171,10 @@ public: ID.AddPointer(&x); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; /// If the statement is a message send expression with nil receiver, returns /// the receiver expression. Returns NULL otherwise. @@ -200,49 +199,38 @@ public: /// to make all PathDiagnosticPieces created by this visitor. static const char *getTag(); - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *Prev, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *Prev, + BugReporterContext &BRC, + BugReport &BR) override; - PathDiagnosticPiece *VisitNodeImpl(const ExplodedNode *N, - const ExplodedNode *Prev, - BugReporterContext &BRC, - BugReport &BR); - - PathDiagnosticPiece *VisitTerminator(const Stmt *Term, - const ExplodedNode *N, - const CFGBlock *srcBlk, - const CFGBlock *dstBlk, - BugReport &R, - BugReporterContext &BRC); - - PathDiagnosticPiece *VisitTrueTest(const Expr *Cond, - bool tookTrue, - BugReporterContext &BRC, - BugReport &R, - const ExplodedNode *N); - - PathDiagnosticPiece *VisitTrueTest(const Expr *Cond, - const DeclRefExpr *DR, - const bool tookTrue, - BugReporterContext &BRC, - BugReport &R, - const ExplodedNode *N); - - PathDiagnosticPiece *VisitTrueTest(const Expr *Cond, - const BinaryOperator *BExpr, - const bool tookTrue, - BugReporterContext &BRC, - BugReport &R, - const ExplodedNode *N); - - PathDiagnosticPiece *VisitConditionVariable(StringRef LhsString, - const Expr *CondVarExpr, - const bool tookTrue, - BugReporterContext &BRC, - BugReport &R, - const ExplodedNode *N); + std::shared_ptr VisitNodeImpl(const ExplodedNode *N, + const ExplodedNode *Prev, + BugReporterContext &BRC, + BugReport &BR); + + std::shared_ptr + VisitTerminator(const Stmt *Term, const ExplodedNode *N, + const CFGBlock *srcBlk, const CFGBlock *dstBlk, BugReport &R, + BugReporterContext &BRC); + + std::shared_ptr + VisitTrueTest(const Expr *Cond, bool tookTrue, BugReporterContext &BRC, + BugReport &R, const ExplodedNode *N); + + std::shared_ptr + VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, const bool tookTrue, + BugReporterContext &BRC, BugReport &R, const ExplodedNode *N); + + std::shared_ptr + VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr, + const bool tookTrue, BugReporterContext &BRC, BugReport &R, + const ExplodedNode *N); + + std::shared_ptr + VisitConditionVariable(StringRef LhsString, const Expr *CondVarExpr, + const bool tookTrue, BugReporterContext &BRC, + BugReport &R, const ExplodedNode *N); bool patternMatch(const Expr *Ex, const Expr *ParentEx, @@ -270,10 +258,10 @@ public: ID.AddPointer(getTag()); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *Prev, - BugReporterContext &BRC, - BugReport &BR) override { + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *Prev, + BugReporterContext &BRC, + BugReport &BR) override { return nullptr; } @@ -302,10 +290,10 @@ public: ID.AddPointer(R); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; }; class SuppressInlineDefensiveChecksVisitor final @@ -333,10 +321,10 @@ public: /// to make all PathDiagnosticPieces created by this visitor. static const char *getTag(); - PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, + BugReport &BR) override; }; class CXXSelfAssignmentBRVisitor final @@ -349,10 +337,10 @@ public: void Profile(llvm::FoldingSetNodeID &ID) const override {} - PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, + BugReport &BR) override; }; namespace bugreporter { diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h b/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h index efe809fb1981..dc6e54a33206 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h @@ -334,7 +334,7 @@ public: // Path "pieces" for path-sensitive diagnostics. //===----------------------------------------------------------------------===// -class PathDiagnosticPiece : public RefCountedBase { +class PathDiagnosticPiece { public: enum Kind { ControlFlow, Event, Macro, Call, Note }; enum DisplayHint { Above, Below }; @@ -416,9 +416,8 @@ public: virtual void dump() const = 0; }; - - -class PathPieces : public std::list > { + +class PathPieces : public std::list> { void flattenTo(PathPieces &Primary, PathPieces &Current, bool ShouldFlattenMacros) const; public: @@ -590,11 +589,11 @@ public: PathDiagnosticLocation getLocation() const override { return callEnter; } - - IntrusiveRefCntPtr getCallEnterEvent() const; - IntrusiveRefCntPtr - getCallEnterWithinCallerEvent() const; - IntrusiveRefCntPtr getCallExitEvent() const; + + std::shared_ptr getCallEnterEvent() const; + std::shared_ptr + getCallEnterWithinCallerEvent() const; + std::shared_ptr getCallExitEvent() const; void flattenLocations() override { callEnter.flatten(); @@ -602,11 +601,11 @@ public: for (PathPieces::iterator I = path.begin(), E = path.end(); I != E; ++I) (*I)->flattenLocations(); } - - static PathDiagnosticCallPiece *construct(const ExplodedNode *N, - const CallExitEnd &CE, - const SourceManager &SM); - + + static std::shared_ptr + construct(const ExplodedNode *N, const CallExitEnd &CE, + const SourceManager &SM); + static PathDiagnosticCallPiece *construct(PathPieces &pieces, const Decl *caller); @@ -787,7 +786,7 @@ public: assert(!Loc.isValid() && "End location already set!"); Loc = EndPiece->getLocation(); assert(Loc.isValid() && "Invalid location for end-of-path piece"); - getActivePath().push_back(EndPiece.release()); + getActivePath().push_back(std::move(EndPiece)); } void appendToDesc(StringRef S) { diff --git a/include/clang/StaticAnalyzer/Core/CheckerManager.h b/include/clang/StaticAnalyzer/Core/CheckerManager.h index 5af717d90268..0316c8fb173b 100644 --- a/include/clang/StaticAnalyzer/Core/CheckerManager.h +++ b/include/clang/StaticAnalyzer/Core/CheckerManager.h @@ -102,12 +102,12 @@ enum class ObjCMessageVisitKind { class CheckerManager { const LangOptions LangOpts; - AnalyzerOptionsRef AOptions; + AnalyzerOptions &AOptions; CheckName CurrentCheckName; public: - CheckerManager(const LangOptions &langOpts, AnalyzerOptionsRef AOptions) - : LangOpts(langOpts), AOptions(std::move(AOptions)) {} + CheckerManager(const LangOptions &langOpts, AnalyzerOptions &AOptions) + : LangOpts(langOpts), AOptions(AOptions) {} ~CheckerManager(); @@ -119,7 +119,7 @@ public: void finishedCheckerRegistration(); const LangOptions &getLangOpts() const { return LangOpts; } - AnalyzerOptions &getAnalyzerOptions() { return *AOptions; } + AnalyzerOptions &getAnalyzerOptions() { return AOptions; } typedef CheckerBase *CheckerRef; typedef const void *CheckerTag; diff --git a/include/clang/Tooling/Tooling.h b/include/clang/Tooling/Tooling.h index ca232f409831..10e26ac25d17 100644 --- a/include/clang/Tooling/Tooling.h +++ b/include/clang/Tooling/Tooling.h @@ -69,7 +69,8 @@ public: /// \brief Perform an action for an invocation. virtual bool - runInvocation(clang::CompilerInvocation *Invocation, FileManager *Files, + runInvocation(std::shared_ptr Invocation, + FileManager *Files, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer) = 0; }; @@ -85,7 +86,8 @@ public: ~FrontendActionFactory() override; /// \brief Invokes the compiler with a FrontendAction created by create(). - bool runInvocation(clang::CompilerInvocation *Invocation, FileManager *Files, + bool runInvocation(std::shared_ptr Invocation, + FileManager *Files, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer) override; @@ -261,7 +263,7 @@ public: bool runInvocation(const char *BinaryName, clang::driver::Compilation *Compilation, - clang::CompilerInvocation *Invocation, + std::shared_ptr Invocation, std::shared_ptr PCHContainerOps); std::vector CommandLine; diff --git a/lib/ARCMigrate/ARCMT.cpp b/lib/ARCMigrate/ARCMT.cpp index 680aa3e48da4..cf7cddefc03d 100644 --- a/lib/ARCMigrate/ARCMT.cpp +++ b/lib/ARCMigrate/ARCMT.cpp @@ -271,7 +271,7 @@ bool arcmt::checkForManualIssues( Diags->setClient(&errRec, /*ShouldOwnClient=*/false); std::unique_ptr Unit(ASTUnit::LoadFromCompilerInvocationAction( - CInvok.release(), PCHContainerOps, Diags)); + std::move(CInvok), PCHContainerOps, Diags)); if (!Unit) { errRec.FinishCapture(); return true; @@ -547,7 +547,7 @@ bool MigrationProcess::applyTransform(TransformFn trans, ASTAction.reset(new ARCMTMacroTrackerAction(ARCMTMacroLocs)); std::unique_ptr Unit(ASTUnit::LoadFromCompilerInvocationAction( - CInvok.release(), PCHContainerOps, Diags, ASTAction.get())); + std::move(CInvok), PCHContainerOps, Diags, ASTAction.get())); if (!Unit) { errRec.FinishCapture(); return true; diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index 1b5988d01988..d03c22af5b29 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -1458,7 +1458,9 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const { T = getPointerType(RT->getPointeeType()); } QualType BaseT = getBaseElementType(T); - if (!BaseT->isIncompleteType() && !T->isFunctionType()) { + if (T->isFunctionType()) + Align = getTypeInfoImpl(T.getTypePtr()).Align; + else if (!BaseT->isIncompleteType()) { // Adjust alignments of declarations with array type by the // large-array alignment on the target. if (const ArrayType *arrayType = getAsArrayType(T)) { diff --git a/lib/ASTMatchers/Dynamic/VariantValue.cpp b/lib/ASTMatchers/Dynamic/VariantValue.cpp index 8f3c70c1a8d8..f0339ed479cd 100644 --- a/lib/ASTMatchers/Dynamic/VariantValue.cpp +++ b/lib/ASTMatchers/Dynamic/VariantValue.cpp @@ -216,18 +216,20 @@ private: VariantMatcher::VariantMatcher() {} VariantMatcher VariantMatcher::SingleMatcher(const DynTypedMatcher &Matcher) { - return VariantMatcher(new SinglePayload(Matcher)); + return VariantMatcher(std::make_shared(Matcher)); } VariantMatcher VariantMatcher::PolymorphicMatcher(std::vector Matchers) { - return VariantMatcher(new PolymorphicPayload(std::move(Matchers))); + return VariantMatcher( + std::make_shared(std::move(Matchers))); } VariantMatcher VariantMatcher::VariadicOperatorMatcher( DynTypedMatcher::VariadicOperator Op, std::vector Args) { - return VariantMatcher(new VariadicOpPayload(Op, std::move(Args))); + return VariantMatcher( + std::make_shared(Op, std::move(Args))); } llvm::Optional VariantMatcher::getSingleMatcher() const { diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 85a83bca002b..4d2b3d007599 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -1751,30 +1751,57 @@ class NVPTXTargetInfo : public TargetInfo { static const char *const GCCRegNames[]; static const Builtin::Info BuiltinInfo[]; CudaArch GPU; + std::unique_ptr HostTarget; public: - NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts, + unsigned TargetPointerWidth) : TargetInfo(Triple) { + assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && + "NVPTX only supports 32- and 64-bit modes."); + TLSSupported = false; - LongWidth = LongAlign = 64; AddrSpaceMap = &NVPTXAddrSpaceMap; UseAddrSpaceMapMangling = true; + // Define available target features // These must be defined in sorted order! NoAsmVariants = true; GPU = CudaArch::SM_20; + if (TargetPointerWidth == 32) + resetDataLayout("e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"); + else + resetDataLayout("e-i64:64-v16:16-v32:32-n16:32:64"); + // If possible, get a TargetInfo for our host triple, so we can match its // types. llvm::Triple HostTriple(Opts.HostTriple); - if (HostTriple.isNVPTX()) - return; - std::unique_ptr HostTarget( - AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); + if (!HostTriple.isNVPTX()) + HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); + + // If no host target, make some guesses about the data layout and return. if (!HostTarget) { + LongWidth = LongAlign = TargetPointerWidth; + PointerWidth = PointerAlign = TargetPointerWidth; + switch (TargetPointerWidth) { + case 32: + SizeType = TargetInfo::UnsignedInt; + PtrDiffType = TargetInfo::SignedInt; + IntPtrType = TargetInfo::SignedInt; + break; + case 64: + SizeType = TargetInfo::UnsignedLong; + PtrDiffType = TargetInfo::SignedLong; + IntPtrType = TargetInfo::SignedLong; + break; + default: + llvm_unreachable("TargetPointerWidth must be 32 or 64"); + } return; } + // Copy properties from host target. PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0); PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0); BoolWidth = HostTarget->getBoolWidth(); @@ -1935,6 +1962,16 @@ public: Opts.support("cl_khr_local_int32_base_atomics"); Opts.support("cl_khr_local_int32_extended_atomics"); } + + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { + // CUDA compilations support all of the host's calling conventions. + // + // TODO: We should warn if you apply a non-default CC to anything other than + // a host function. + if (HostTarget) + return HostTarget->checkCallingConvention(CC); + return CCCR_Warning; + } }; const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = { @@ -1953,31 +1990,6 @@ ArrayRef NVPTXTargetInfo::getGCCRegNames() const { return llvm::makeArrayRef(GCCRegNames); } -class NVPTX32TargetInfo : public NVPTXTargetInfo { -public: - NVPTX32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) - : NVPTXTargetInfo(Triple, Opts) { - LongWidth = LongAlign = 32; - PointerWidth = PointerAlign = 32; - SizeType = TargetInfo::UnsignedInt; - PtrDiffType = TargetInfo::SignedInt; - IntPtrType = TargetInfo::SignedInt; - resetDataLayout("e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"); - } -}; - -class NVPTX64TargetInfo : public NVPTXTargetInfo { -public: - NVPTX64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) - : NVPTXTargetInfo(Triple, Opts) { - PointerWidth = PointerAlign = 64; - SizeType = TargetInfo::UnsignedLong; - PtrDiffType = TargetInfo::SignedLong; - IntPtrType = TargetInfo::SignedLong; - resetDataLayout("e-i64:64-v16:16-v32:32-n16:32:64"); - } -}; - static const unsigned AMDGPUAddrSpaceMap[] = { 1, // opencl_global 3, // opencl_local @@ -8385,6 +8397,107 @@ public: } }; + +// AVR Target +class AVRTargetInfo : public TargetInfo { +public: + AVRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple) { + TLSSupported = false; + PointerWidth = 16; + PointerAlign = 8; + IntWidth = 16; + IntAlign = 8; + LongWidth = 32; + LongAlign = 8; + LongLongWidth = 64; + LongLongAlign = 8; + SuitableAlign = 8; + DefaultAlignForAttributeAligned = 8; + HalfWidth = 16; + HalfAlign = 8; + FloatWidth = 32; + FloatAlign = 8; + DoubleWidth = 32; + DoubleAlign = 8; + DoubleFormat = &llvm::APFloat::IEEEsingle(); + LongDoubleWidth = 32; + LongDoubleAlign = 8; + LongDoubleFormat = &llvm::APFloat::IEEEsingle(); + SizeType = UnsignedInt; + PtrDiffType = SignedInt; + IntPtrType = SignedInt; + Char16Type = UnsignedInt; + WCharType = SignedInt; + WIntType = SignedInt; + Char32Type = UnsignedLong; + SigAtomicType = SignedChar; + resetDataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + "-f32:32:32-f64:64:64-n8"); + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override { + Builder.defineMacro("__AVR__"); + } + + ArrayRef getTargetBuiltins() const override { + return None; + } + + BuiltinVaListKind getBuiltinVaListKind() const override { + return TargetInfo::VoidPtrBuiltinVaList; + } + + const char *getClobbers() const override { + return ""; + } + + ArrayRef getGCCRegNames() const override { + static const char * const GCCRegNames[] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "X", "Y", "Z", "SP" + }; + return llvm::makeArrayRef(GCCRegNames); + } + + ArrayRef getGCCRegAliases() const override { + return None; + } + + ArrayRef getGCCAddlRegNames() const override { + static const TargetInfo::AddlRegName AddlRegNames[] = { + { { "r26", "r27"}, 26 }, + { { "r28", "r29"}, 27 }, + { { "r30", "r31"}, 28 }, + { { "SPL", "SPH"}, 29 }, + }; + return llvm::makeArrayRef(AddlRegNames); + } + + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &Info) const override { + return false; + } + + IntType getIntTypeByWidth(unsigned BitWidth, + bool IsSigned) const final { + // AVR prefers int for 16-bit integers. + return BitWidth == 16 ? (IsSigned ? SignedInt : UnsignedInt) + : TargetInfo::getIntTypeByWidth(BitWidth, IsSigned); + } + + IntType getLeastIntTypeByWidth(unsigned BitWidth, + bool IsSigned) const final { + // AVR uses int for int_least16_t and int_fast16_t. + return BitWidth == 16 + ? (IsSigned ? SignedInt : UnsignedInt) + : TargetInfo::getLeastIntTypeByWidth(BitWidth, IsSigned); + } +}; + } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -8507,6 +8620,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new ARMbeTargetInfo(Triple, Opts); } + case llvm::Triple::avr: + return new AVRTargetInfo(Triple, Opts); case llvm::Triple::bpfeb: case llvm::Triple::bpfel: return new BPFTargetInfo(Triple, Opts); @@ -8632,9 +8747,9 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, } case llvm::Triple::nvptx: - return new NVPTX32TargetInfo(Triple, Opts); + return new NVPTXTargetInfo(Triple, Opts, /*TargetPointerWidth=*/32); case llvm::Triple::nvptx64: - return new NVPTX64TargetInfo(Triple, Opts); + return new NVPTXTargetInfo(Triple, Opts, /*TargetPointerWidth=*/64); case llvm::Triple::amdgcn: case llvm::Triple::r600: diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 164e52d7de27..ed09f3a45566 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -14,6 +14,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/Utils.h" +#include "clang/Lex/HeaderSearchOptions.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -32,6 +33,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/LTO/LTOBackend.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Passes/PassBuilder.h" @@ -61,6 +63,7 @@ namespace { class EmitAssemblyHelper { DiagnosticsEngine &Diags; + const HeaderSearchOptions &HSOpts; const CodeGenOptions &CodeGenOpts; const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; @@ -100,11 +103,14 @@ private: raw_pwrite_stream &OS); public: - EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts, + EmitAssemblyHelper(DiagnosticsEngine &_Diags, + const HeaderSearchOptions &HeaderSearchOpts, + const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, const LangOptions &LOpts, Module *M) - : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts), - TheModule(M), CodeGenerationTime("codegen", "Code Generation Time") {} + : Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts), + TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), + CodeGenerationTime("codegen", "Code Generation Time") {} ~EmitAssemblyHelper() { if (CodeGenOpts.DisableFree) @@ -584,12 +590,18 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; Options.MCOptions.MCIncrementalLinkerCompatible = CodeGenOpts.IncrementalLinkerCompatible; - Options.MCOptions.MCPIECopyRelocations = - CodeGenOpts.PIECopyRelocations; + Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; Options.MCOptions.ABIName = TargetOpts.ABI; + for (const auto &Entry : HSOpts.UserEntries) + if (!Entry.IsFramework && + (Entry.Group == frontend::IncludeDirGroup::Quoted || + Entry.Group == frontend::IncludeDirGroup::Angled || + Entry.Group == frontend::IncludeDirGroup::System)) + Options.MCOptions.IASSearchPaths.push_back( + Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, Options, RM, CM, OptLevel)); @@ -929,17 +941,19 @@ static void runThinLTOBackend(const CodeGenOptions &CGOpts, Module *M, } void clang::EmitBackendOutput(DiagnosticsEngine &Diags, + const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, const llvm::DataLayout &TDesc, - Module *M, BackendAction Action, + const LangOptions &LOpts, + const llvm::DataLayout &TDesc, Module *M, + BackendAction Action, std::unique_ptr OS) { if (!CGOpts.ThinLTOIndexFile.empty()) { runThinLTOBackend(CGOpts, M, std::move(OS)); return; } - EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M); + EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M); if (CGOpts.ExperimentalNewPassManager) AsmHelper.EmitAssemblyWithNewPassManager(Action, std::move(OS)); diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 43ca74761fbd..4d34b3e9222f 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -35,6 +35,11 @@ using namespace clang; using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t Value, int64_t Low, int64_t High) { + return std::min(High, std::max(Low, Value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + + case PPC::BI__builtin_vsx_insertword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + + // Third argument is a compile time constant int. It must be clamped to + // to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast(Ops[2]); + assert(ArgCI && + "Third arg to xxinsertw intrinsic must be constant integer"); + const int64_t MaxIndex = 12; + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + + // The builtin semantics don't exactly match the xxinsertw instructions + // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the + // word from the first argument, and inserts it in the second argument. The + // instruction extracts the word from its second input register and inserts + // it into its first input register, so swap the first and second arguments. + std::swap(Ops[0], Ops[1]); + + // Need to cast the second argument from a vector of unsigned int to a + // vector of long long. + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + if (getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + // Reverse the double words in the vector we will extract from. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); + + // Reverse the index. + Index = MaxIndex - Index; + } + + // Intrinsic expects the first arg to be a vector of int. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[2] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); + } + + case PPC::BI__builtin_vsx_extractuword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + + // Intrinsic expects the first argument to be a vector of doublewords. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + + // The second argument is a compile time constant int that needs to + // be clamped to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast(Ops[1]); + assert(ArgCI && + "Second Arg to xxextractuw intrinsic must be a constant integer!"); + const int64_t MaxIndex = 12; + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + + if (getTarget().isLittleEndian()) { + // Reverse the index. + Index = MaxIndex - Index; + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, Ops); + + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); + return ShuffleCall; + } else { + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); + } + } } } diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 9b96a59aec38..c7c61e0c8ecb 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -393,15 +393,13 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { // When declaring a function without a prototype, always use a // non-variadic type. - if (isa(FTy)) { - CanQual noProto = FTy.getAs(); + if (CanQual noProto = FTy.getAs()) { return arrangeLLVMFunctionInfo( noProto->getReturnType(), /*instanceMethod=*/false, /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); } - assert(isa(FTy)); - return arrangeFreeFunctionType(FTy.getAs(), FD); + return arrangeFreeFunctionType(FTy.castAs(), FD); } /// Arrange the argument and result information for the declaration or diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 183201c78e36..e5e34a5f3ed6 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -604,12 +604,13 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, } if (Checks.size() > 0) { + // Make sure we're not losing information. Alignment needs to be a power of + // 2 + assert(!AlignVal || (uint64_t)1 << llvm::Log2_64(AlignVal) == AlignVal); llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(Loc), - EmitCheckTypeDescriptor(Ty), - llvm::ConstantInt::get(SizeTy, AlignVal), - llvm::ConstantInt::get(Int8Ty, TCK) - }; + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(Ty), + llvm::ConstantInt::get(Int8Ty, AlignVal ? llvm::Log2_64(AlignVal) : 1), + llvm::ConstantInt::get(Int8Ty, TCK)}; EmitCheck(Checks, SanitizerHandler::TypeMismatch, StaticData, Ptr); } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 0624d86b564a..27af344fae87 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2701,14 +2701,16 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: "only required for the device " "code generation."); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = - OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); + OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, + /*Flags=*/0); ++OffloadingEntriesNum; } void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - llvm::Constant *Addr, llvm::Constant *ID) { + llvm::Constant *Addr, llvm::Constant *ID, + int32_t Flags) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { @@ -2719,9 +2721,10 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); + Entry.setFlags(Flags); return; } else { - OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); + OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; } } @@ -2888,7 +2891,8 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { } void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, uint64_t Size) { + llvm::Constant *Addr, uint64_t Size, + int32_t Flags) { StringRef Name = Addr->getName(); auto *TgtOffloadEntryType = cast( CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); @@ -2918,6 +2922,8 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, EntryInit.add(AddrPtr); EntryInit.add(StrPtr); EntryInit.addInt(CGM.SizeTy, Size); + EntryInit.addInt(CGM.Int32Ty, Flags); + EntryInit.addInt(CGM.Int32Ty, 0); llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal(".omp_offloading.entry", Align, @@ -3090,6 +3096,8 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { // // (function or global) // char *name; // Name of the function or global. // size_t size; // Size of the entry info (0 if it a function). + // int32_t flags; // Flags associated with the entry, e.g. 'link'. + // int32_t reserved; // Reserved, to use by the runtime library. // }; if (TgtOffloadEntryQTy.isNull()) { ASTContext &C = CGM.getContext(); @@ -3098,6 +3106,10 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); addFieldToRecordDecl(C, RD, C.getSizeType()); + addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); + addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); RD->completeDefinition(); TgtOffloadEntryQTy = C.getRecordType(RD); } @@ -4852,7 +4864,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); + DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, + /*Flags=*/0); } /// discard all CompoundStmts intervening between two constructs diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 9057e5ec4c14..9a784dff0ae8 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -110,9 +110,9 @@ protected: CodeGenModule &CGM; /// \brief Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr and size \a Size. + /// address \a Addr, size \a Size, and flags \a Flags. virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size); + uint64_t Size, int32_t Flags = 0); /// \brief Helper to emit outlined function for 'target' directive. /// \param D Directive to emit. @@ -245,10 +245,10 @@ private: unsigned OffloadingEntriesNum; public: - /// \brief Base class of the entries info. + /// Base class of the entries info. class OffloadEntryInfo { public: - /// \brief Kind of a given entry. Currently, only target regions are + /// Kind of a given entry. Currently, only target regions are /// supported. enum OffloadingEntryInfoKinds : unsigned { // Entry is a target region. @@ -257,17 +257,24 @@ private: OFFLOAD_ENTRY_INFO_INVALID = ~0u }; - OffloadEntryInfo() : Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {} - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order) - : Order(Order), Kind(Kind) {} + OffloadEntryInfo() + : Flags(0), Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {} + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, + int32_t Flags) + : Flags(Flags), Order(Order), Kind(Kind) {} bool isValid() const { return Order != ~0u; } unsigned getOrder() const { return Order; } OffloadingEntryInfoKinds getKind() const { return Kind; } + int32_t getFlags() const { return Flags; } + void setFlags(int32_t NewFlags) { Flags = NewFlags; } static bool classof(const OffloadEntryInfo *Info) { return true; } - protected: - // \brief Order this entry was emitted. + private: + /// Flags associated with the device global. + int32_t Flags; + + /// Order this entry was emitted. unsigned Order; OffloadingEntryInfoKinds Kind; @@ -292,12 +299,13 @@ private: public: OffloadEntryInfoTargetRegion() - : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u), + : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u, + /*Flags=*/0), Addr(nullptr), ID(nullptr) {} explicit OffloadEntryInfoTargetRegion(unsigned Order, llvm::Constant *Addr, - llvm::Constant *ID) - : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order), + llvm::Constant *ID, int32_t Flags) + : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order, Flags), Addr(Addr), ID(ID) {} llvm::Constant *getAddress() const { return Addr; } @@ -321,8 +329,8 @@ private: /// \brief Register target region entry. void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - llvm::Constant *Addr, - llvm::Constant *ID); + llvm::Constant *Addr, llvm::Constant *ID, + int32_t Flags); /// \brief Return true if a target region entry with the provided /// information exists. bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index fe0e2acdfdbf..bc1458b1c203 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -22,14 +22,10 @@ using namespace CodeGen; namespace { enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, - /// kmp_int32 thread_limit); + /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit); OMPRTL_NVPTX__kmpc_kernel_init, -}; - -// NVPTX Address space -enum AddressSpace { - AddressSpaceShared = 3, + /// \brief Call to void __kmpc_kernel_deinit(); + OMPRTL_NVPTX__kmpc_kernel_deinit, }; } // namespace @@ -70,6 +66,15 @@ static void getNVPTXCTABarrier(CodeGenFunction &CGF) { /// Synchronize all GPU threads in a block. static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } +/// Get the value of the thread_limit clause in the teams directive. +/// The runtime encodes thread_limit in the launch parameter, always starting +/// thread_limit+warpSize threads per team. +static llvm::Value *getThreadLimit(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + return Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), + "thread_limit"); +} + /// Get the thread id of the OMP master thread. /// The master thread id is the first thread (lane) of the last warp in the /// GPU block. Warp size is assumed to be some power of 2. @@ -103,35 +108,105 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage, /* placeholder */ "_worker", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI); - WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage); - WorkerFn->addFnAttr(llvm::Attribute::NoInline); } -void CGOpenMPRuntimeNVPTX::initializeEnvironment() { - // - // Initialize master-worker control state in shared memory. - // +void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) { + EntryFunctionState EST; + WorkerFunctionState WST(CGM); + + // Emit target region as a standalone region. + class NVPTXPrePostActionTy : public PrePostActionTy { + CGOpenMPRuntimeNVPTX &RT; + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; + + public: + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) + : RT(RT), EST(EST), WST(WST) {} + void Enter(CodeGenFunction &CGF) override { + RT.emitGenericEntryHeader(CGF, EST, WST); + } + void Exit(CodeGenFunction &CGF) override { + RT.emitGenericEntryFooter(CGF, EST); + } + } Action(*this, EST, WST); + CodeGen.setAction(Action); + emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, + IsOffloadEntry, CodeGen); - auto DL = CGM.getDataLayout(); - ActiveWorkers = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false, - llvm::GlobalValue::CommonLinkage, - llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0, - llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared); - ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty)); - - WorkID = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false, - llvm::GlobalValue::CommonLinkage, - llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0, - llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared); - WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty)); + // Create the worker function + emitWorkerFunction(WST); + + // Now change the name of the worker function to correspond to this target + // region's entry function. + WST.WorkerFn->setName(OutlinedFn->getName() + "_worker"); +} + +// Setup NVPTX threads for master-worker OpenMP scheme. +void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, + EntryFunctionState &EST, + WorkerFunctionState &WST) { + CGBuilderTy &Bld = CGF.Builder; + + llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker"); + llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); + llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); + EST.ExitBB = CGF.createBasicBlock(".exit"); + + auto *IsWorker = + Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF)); + Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); + + CGF.EmitBlock(WorkerBB); + CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None); + CGF.EmitBranch(EST.ExitBB); + + CGF.EmitBlock(MasterCheckBB); + auto *IsMaster = + Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); + Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB); + + CGF.EmitBlock(MasterBB); + // First action in sequential region: + // Initialize the state of the OpenMP runtime library on the GPU. + llvm::Value *Args[] = {getThreadLimit(CGF)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); +} + +void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF, + EntryFunctionState &EST) { + if (!EST.ExitBB) + EST.ExitBB = CGF.createBasicBlock(".exit"); + + llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier"); + CGF.EmitBranch(TerminateBB); + + CGF.EmitBlock(TerminateBB); + // Signal termination condition. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None); + // Barrier to terminate worker threads. + syncCTAThreads(CGF); + // Master thread jumps to exit point. + CGF.EmitBranch(EST.ExitBB); + + CGF.EmitBlock(EST.ExitBB); + EST.ExitBB = nullptr; } void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { auto &Ctx = CGM.getContext(); CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {}); emitWorkerLoop(CGF, WST); CGF.FinishFunction(); @@ -163,21 +238,26 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.EmitBlock(AwaitBB); // Wait for parallel work syncCTAThreads(CGF); + + Address WorkFn = + CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn"); + Address ExecStatus = + CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status"); + CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0)); + CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); + + // TODO: Call into runtime to get parallel work. + // On termination condition (workid == 0), exit loop. - llvm::Value *ShouldTerminate = Bld.CreateICmpEQ( - Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()), - llvm::Constant::getNullValue(WorkID->getType()->getElementType()), - "should_terminate"); + llvm::Value *ShouldTerminate = + Bld.CreateIsNull(Bld.CreateLoad(WorkFn), "should_terminate"); Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB); // Activate requested workers. CGF.EmitBlock(SelectWorkersBB); - llvm::Value *ThreadID = getNVPTXThreadID(CGF); - llvm::Value *ActiveThread = Bld.CreateICmpSLT( - ThreadID, - Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()), - "active_thread"); - Bld.CreateCondBr(ActiveThread, ExecuteBB, BarrierBB); + llvm::Value *IsActive = + Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active"); + Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB); // Signal start of parallel region. CGF.EmitBlock(ExecuteBB); @@ -197,72 +277,6 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.EmitBlock(ExitBB); } -// Setup NVPTX threads for master-worker OpenMP scheme. -void CGOpenMPRuntimeNVPTX::emitEntryHeader(CodeGenFunction &CGF, - EntryFunctionState &EST, - WorkerFunctionState &WST) { - CGBuilderTy &Bld = CGF.Builder; - - // Get the master thread id. - llvm::Value *MasterID = getMasterThreadID(CGF); - // Current thread's identifier. - llvm::Value *ThreadID = getNVPTXThreadID(CGF); - - // Setup BBs in entry function. - llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker"); - llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker"); - llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - EST.ExitBB = CGF.createBasicBlock(".exit"); - - // The head (master thread) marches on while its body of companion threads in - // the warp go to sleep. - llvm::Value *ShouldDie = - Bld.CreateICmpUGT(ThreadID, MasterID, "excess_in_master_warp"); - Bld.CreateCondBr(ShouldDie, EST.ExitBB, WorkerCheckBB); - - // Select worker threads... - CGF.EmitBlock(WorkerCheckBB); - llvm::Value *IsWorker = Bld.CreateICmpULT(ThreadID, MasterID, "is_worker"); - Bld.CreateCondBr(IsWorker, WorkerBB, MasterBB); - - // ... and send to worker loop, awaiting parallel invocation. - CGF.EmitBlock(WorkerBB); - CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None); - CGF.EmitBranch(EST.ExitBB); - - // Only master thread executes subsequent serial code. - CGF.EmitBlock(MasterBB); - - // First action in sequential region: - // Initialize the state of the OpenMP runtime library on the GPU. - llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), - Args); -} - -void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF, - EntryFunctionState &EST) { - if (!EST.ExitBB) - EST.ExitBB = CGF.createBasicBlock(".exit"); - - CGBuilderTy &Bld = CGF.Builder; - llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier"); - CGF.EmitBranch(TerminateBB); - - CGF.EmitBlock(TerminateBB); - // Signal termination condition. - Bld.CreateAlignedStore( - llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID, - WorkID->getAlignment()); - // Barrier to terminate worker threads. - syncCTAThreads(CGF); - // Master thread jumps to exit point. - CGF.EmitBranch(EST.ExitBB); - - CGF.EmitBlock(EST.ExitBB); - EST.ExitBB = nullptr; -} - /// \brief Returns specified OpenMP runtime function for the current OpenMP /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. @@ -272,21 +286,27 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::Constant *RTLFn = nullptr; switch (static_cast(Function)) { case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 omp_handle, - // kmp_int32 thread_limit); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty}; + // Build void __kmpc_kernel_init(kmp_int32 thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); break; } + case OMPRTL_NVPTX__kmpc_kernel_deinit: { + // Build void __kmpc_kernel_deinit(); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, {}, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); + break; + } } return RTLFn; } void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size) { + uint64_t Size, int32_t) { auto *F = dyn_cast(Addr); // TODO: Add support for global variables on the device after declare target // support. @@ -315,44 +335,14 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); - EntryFunctionState EST; - WorkerFunctionState WST(CGM); - - // Emit target region as a standalone region. - class NVPTXPrePostActionTy : public PrePostActionTy { - CGOpenMPRuntimeNVPTX &RT; - CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; - CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; - - public: - NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, - CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, - CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) - : RT(RT), EST(EST), WST(WST) {} - void Enter(CodeGenFunction &CGF) override { - RT.emitEntryHeader(CGF, EST, WST); - } - void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); } - } Action(*this, EST, WST); - CodeGen.setAction(Action); - emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, - IsOffloadEntry, CodeGen); - - // Create the worker function - emitWorkerFunction(WST); - - // Now change the name of the worker function to correspond to this target - // region's entry function. - WST.WorkerFn->setName(OutlinedFn->getName() + "_worker"); + emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + CodeGen); } CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM), ActiveWorkers(nullptr), WorkID(nullptr) { + : CGOpenMPRuntime(CGM) { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); - - // Called once per module during initialization. - initializeEnvironment(); } void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index a33fb27579f6..63a02965a5bd 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -24,7 +24,7 @@ namespace clang { namespace CodeGen { class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { -public: +private: struct EntryFunctionState { llvm::BasicBlock *ExitBB = nullptr; }; @@ -40,34 +40,21 @@ public: void createWorkerFunction(CodeGenModule &CGM); }; - /// \brief Helper for target entry function. Guide the master and worker - /// threads to their respective locations. - void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, - WorkerFunctionState &WST); - - /// \brief Signal termination of OMP execution. - void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); - -private: - // - // Private state and methods. - // - - // Master-worker control state. - // Number of requested OMP threads in parallel region. - llvm::GlobalVariable *ActiveWorkers; - // Outlined function for the workers to execute. - llvm::GlobalVariable *WorkID; - - /// \brief Initialize master-worker control state. - void initializeEnvironment(); - /// \brief Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); /// \brief Helper for worker function. Emit body of worker loop. void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); + /// \brief Helper for generic target entry function. Guide the master and + /// worker threads to their respective locations. + void emitGenericEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + WorkerFunctionState &WST); + + /// \brief Signal termination of OMP execution for generic target entry + /// function. + void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + /// \brief Returns specified OpenMP runtime function for the current OpenMP /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. @@ -79,9 +66,23 @@ private: // /// \brief Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr and size \a Size. + /// address \a Addr, size \a Size, and flags \a Flags. void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size) override; + uint64_t Size, int32_t Flags = 0) override; + + /// \brief Emit outlined function specialized for the Fork-Join + /// programming model for applicable target directives on the NVPTX device. + /// \param D Directive to emit. + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// An outlined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + void emitGenericKernel(const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); /// \brief Emit outlined function for 'target' directive on the NVPTX /// device. diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index 1e17918df4a4..5f74141d75b3 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -44,6 +44,7 @@ namespace clang { virtual void anchor(); DiagnosticsEngine &Diags; BackendAction Action; + const HeaderSearchOptions &HeaderSearchOpts; const CodeGenOptions &CodeGenOpts; const TargetOptions &TargetOpts; const LangOptions &LangOpts; @@ -77,8 +78,8 @@ namespace clang { const SmallVectorImpl> &LinkModules, std::unique_ptr OS, LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr) - : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts), - TargetOpts(TargetOpts), LangOpts(LangOpts), + : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), + CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(std::move(OS)), Context(nullptr), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), @@ -225,8 +226,8 @@ namespace clang { EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); - EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts, - C.getTargetInfo().getDataLayout(), + EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, + LangOpts, C.getTargetInfo().getDataLayout(), getModule(), Action, std::move(AsmOutStream)); Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); @@ -898,9 +899,10 @@ void CodeGenAction::ExecuteAction() { Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler, &CI.getDiagnostics()); - EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts, - CI.getLangOpts(), CI.getTarget().getDataLayout(), - TheModule.get(), BA, std::move(OS)); + EmitBackendOutput(CI.getDiagnostics(), CI.getHeaderSearchOpts(), + CI.getCodeGenOpts(), TargetOpts, CI.getLangOpts(), + CI.getTarget().getDataLayout(), TheModule.get(), BA, + std::move(OS)); return; } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 1347f54df9ac..05522cd40024 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -120,7 +120,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \ SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \ SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \ - SANITIZER_CHECK(TypeMismatch, type_mismatch, 0) \ + SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \ SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0) enum SanitizerHandler { diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index baf7811eedaf..754f9968b67f 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -282,7 +282,7 @@ public: // Print the IR for the PCH container to the debug output. llvm::SmallString<0> Buffer; clang::EmitBackendOutput( - Diags, CodeGenOpts, TargetOpts, LangOpts, + Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayout(), M.get(), BackendAction::Backend_EmitLL, llvm::make_unique(Buffer)); @@ -290,9 +290,10 @@ public: }); // Use the LLVM backend to emit the pch container. - clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts, - Ctx.getTargetInfo().getDataLayout(), M.get(), - BackendAction::Backend_EmitObj, std::move(OS)); + clang::EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, + LangOpts, Ctx.getTargetInfo().getDataLayout(), + M.get(), BackendAction::Backend_EmitObj, + std::move(OS)); // Free the memory for the temporary buffer. llvm::SmallVector Empty; diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 391eb53d2500..d2fc3888ef29 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -871,6 +871,14 @@ static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { return NumMembers <= 4; } +/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. +static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { + auto AI = ABIArgInfo::getDirect(T); + AI.setInReg(true); + AI.setCanBeFlattened(false); + return AI; +} + //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// @@ -884,6 +892,11 @@ struct CCState { unsigned FreeSSERegs; }; +enum { + // Vectorcall only allows the first 6 parameters to be passed in registers. + VectorcallMaxParamNumAsReg = 6 +}; + /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public SwiftABIInfo { enum Class { @@ -929,6 +942,8 @@ class X86_32ABIInfo : public SwiftABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, + const ABIArgInfo& current) const; /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -946,6 +961,8 @@ class X86_32ABIInfo : public SwiftABIInfo { void addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; + void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const; public: @@ -1494,6 +1511,27 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } +ABIArgInfo +X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + // HVA types get passed directly in registers if there is room. + State.FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + // If there's no room, the HVA gets passed as normal indirect + // structure. + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + return current; +} + ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1513,19 +1551,34 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets. + // to other targets, regcall uses some of the HVA rules. const Type *Base = nullptr; uint64_t NumElts = 0; if ((State.CC == llvm::CallingConv::X86_VectorCall || State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) + + if (State.CC == llvm::CallingConv::X86_RegCall) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (State.CC == llvm::CallingConv::X86_VectorCall) { + if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { + // Actual floating-point types get registers first time through if + // there is registers available + State.FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVA Types only get registers after everything else has been + // set, so it gets set as indirect for now. + return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); + } } - return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1604,6 +1657,36 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(); } +void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const { + // Vectorcall only allows the first 6 parameters to be passed in registers, + // and homogeneous vector aggregates are only put into registers as a second + // priority. + unsigned Count = 0; + CCState ZeroState = State; + ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; + // HVAs must be done as a second priority for registers, so the deferred + // items are dealt with by going through the pattern a second time. + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = classifyArgumentType(I.type, State); + else + // Parameters after the 6th cannot be passed in registers, + // so pretend there are no registers left for them. + I.info = classifyArgumentType(I.type, ZeroState); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + ++Count; + } + Count = 0; + // Go through the arguments a second time to get HVAs registers if there + // are still some available. + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = reclassifyHvaArgType(I.type, State, I.info); + ++Count; + } +} + void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI.getCallingConvention()); if (IsMCUABI) @@ -1638,9 +1721,14 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { ++State.FreeRegs; bool UsedInAlloca = false; - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + if (State.CC == llvm::CallingConv::X86_VectorCall) { + computeVectorCallArgs(FI, State, UsedInAlloca); + } else { + // If not vectorcall, revert to normal behavior. + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } // If we needed to use inalloca for any argument, do a second pass and rewrite @@ -2070,10 +2158,14 @@ public: } private: - ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const; - - bool IsMingw64; + ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, + bool IsVectorCall, bool IsRegCall) const; + ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const; + void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, + bool IsVectorCall, bool IsRegCall) const; + + bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -3679,8 +3771,24 @@ Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, /*allowHigherAlign*/ false); } +ABIArgInfo +WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + return current; +} + ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const { + bool IsReturnType, bool IsVectorCall, + bool IsRegCall) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); @@ -3704,21 +3812,34 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, } - // vectorcall adds the concept of a homogenous vector aggregate, similar to - // other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (FreeSSERegs >= NumElts) { - FreeSSERegs -= NumElts; - if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + // vectorcall adds the concept of a homogenous vector aggregate, similar to + // other targets. + if ((IsVectorCall || IsRegCall) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (IsRegCall) { + if (FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + } + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } else if (IsVectorCall) { + if (FreeSSERegs >= NumElts && + (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { + FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (IsReturnType) { + return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVAs are delayed and reclassified in the 2nd step. + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } } - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } - if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. @@ -3754,6 +3875,32 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(); } +void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, + unsigned FreeSSERegs, + bool IsVectorCall, + bool IsRegCall) const { + unsigned Count = 0; + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + else { + // Since these cannot be passed in registers, pretend no registers + // are left. + unsigned ZeroSSERegsAvail = 0; + I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false, + IsVectorCall, IsRegCall); + } + ++Count; + } + + Count = 0; + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); + ++Count; + } +} + void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; @@ -3769,17 +3916,24 @@ void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { } if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); + FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, + IsVectorCall, IsRegCall); if (IsVectorCall) { // We can use up to 6 SSE register parameters with vectorcall. FreeSSERegs = 6; } else if (IsRegCall) { + // RegCall gives us 16 SSE registers, we can reuse the return registers. FreeSSERegs = 16; } - for (auto &I : FI.arguments()) - I.info = classify(I.type, FreeSSERegs, false); + if (IsVectorCall) { + computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall); + } else { + for (auto &I : FI.arguments()) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + } + } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp index 7bd43ac9da2f..15f830d029eb 100644 --- a/lib/Driver/Driver.cpp +++ b/lib/Driver/Driver.cpp @@ -3764,6 +3764,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::wasm64: TC = new toolchains::WebAssembly(*this, Target, Args); break; + case llvm::Triple::avr: + TC = new toolchains::AVRToolChain(*this, Target, Args); + break; default: if (Target.getVendor() == llvm::Triple::Myriad) TC = new toolchains::MyriadToolChain(*this, Target, Args); diff --git a/lib/Driver/MSVCToolChain.cpp b/lib/Driver/MSVCToolChain.cpp index 95cf056f7a74..17fd6ac6f714 100644 --- a/lib/Driver/MSVCToolChain.cpp +++ b/lib/Driver/MSVCToolChain.cpp @@ -47,9 +47,9 @@ using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; -MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple& Triple, +MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : ToolChain(D, Triple, Args) { + : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) { getProgramPaths().push_back(getDriver().getInstalledDir()); if (getDriver().getInstalledDir() != getDriver().Dir) getProgramPaths().push_back(getDriver().Dir); @@ -94,6 +94,15 @@ bool MSVCToolChain::isPICDefaultForced() const { return getArch() == llvm::Triple::x86_64; } +void MSVCToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); +} + +void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const { + CudaInstallation.print(OS); +} + #ifdef USE_WIN32 static bool readFullStringValue(HKEY hkey, const char *valueName, std::string &value) { diff --git a/lib/Driver/MinGWToolChain.cpp b/lib/Driver/MinGWToolChain.cpp index 938440b08f60..e971869fb569 100644 --- a/lib/Driver/MinGWToolChain.cpp +++ b/lib/Driver/MinGWToolChain.cpp @@ -20,10 +20,9 @@ using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; -namespace { // Simplified from Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple. -bool findGccVersion(StringRef LibDir, std::string &GccLibDir, - std::string &Ver) { +static bool findGccVersion(StringRef LibDir, std::string &GccLibDir, + std::string &Ver) { Generic_GCC::GCCVersion Version = Generic_GCC::GCCVersion::Parse("0.0.0"); std::error_code EC; for (llvm::sys::fs::directory_iterator LI(LibDir, EC), LE; !EC && LI != LE; @@ -40,7 +39,6 @@ bool findGccVersion(StringRef LibDir, std::string &GccLibDir, } return Ver.size(); } -} void MinGW::findGccLibDir() { llvm::SmallVector, 2> Archs; @@ -63,7 +61,7 @@ void MinGW::findGccLibDir() { } MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : ToolChain(D, Triple, Args) { + : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) { getProgramPaths().push_back(getDriver().getInstalledDir()); // In Windows there aren't any standard install locations, we search @@ -135,6 +133,15 @@ bool MinGW::UseSEHExceptions() const { return getArch() == llvm::Triple::x86_64; } +void MinGW::AddCudaIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); +} + +void MinGW::printVerboseInfo(raw_ostream &OS) const { + CudaInstallation.print(OS); +} + // Include directories for various hosts: // Windows, mingw.org diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp index 968b0cb4724a..789a2f0525be 100644 --- a/lib/Driver/ToolChains.cpp +++ b/lib/Driver/ToolChains.cpp @@ -1805,19 +1805,26 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) { } CudaInstallationDetector::CudaInstallationDetector( - const Driver &D, const llvm::Triple &TargetTriple, + const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args) : D(D) { SmallVector CudaPathCandidates; - if (Args.hasArg(options::OPT_cuda_path_EQ)) + // In decreasing order so we prefer newer versions to older versions. + std::initializer_list Versions = {"8.0", "7.5", "7.0"}; + + if (Args.hasArg(options::OPT_cuda_path_EQ)) { CudaPathCandidates.push_back( Args.getLastArgValue(options::OPT_cuda_path_EQ)); - else { + } else if (HostTriple.isOSWindows()) { + for (const char *Ver : Versions) + CudaPathCandidates.push_back( + D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" + + Ver); + } else { CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda"); - CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0"); - CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5"); - CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0"); + for (const char *Ver : Versions) + CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver); } for (const auto &CudaPath : CudaPathCandidates) { @@ -1840,7 +1847,7 @@ CudaInstallationDetector::CudaInstallationDetector( // It's sufficient for our purposes to be flexible: If both lib and lib64 // exist, we choose whichever one matches our triple. Otherwise, if only // lib exists, we use it. - if (TargetTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64")) + if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64")) LibPath = InstallPath + "/lib64"; else if (FS.exists(InstallPath + "/lib")) LibPath = InstallPath + "/lib"; @@ -4870,7 +4877,7 @@ Tool *DragonFly::buildLinker() const { CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) : ToolChain(D, Triple, Args), HostTC(HostTC), - CudaInstallation(D, Triple, Args) { + CudaInstallation(D, HostTC.getTriple(), Args) { if (CudaInstallation.isValid()) getProgramPaths().push_back(CudaInstallation.getBinPath()); } @@ -5021,6 +5028,11 @@ SanitizerMask CudaToolChain::getSupportedSanitizers() const { return HostTC.getSupportedSanitizers(); } +VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D, + const ArgList &Args) const { + return HostTC.computeMSVCVersion(D, Args); +} + /// XCore tool chain XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) @@ -5318,3 +5330,12 @@ SanitizerMask Contiki::getSupportedSanitizers() const { Res |= SanitizerKind::SafeStack; return Res; } + +/// AVR Toolchain +AVRToolChain::AVRToolChain(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : Generic_ELF(D, Triple, Args) { } +Tool *AVRToolChain::buildLinker() const { + return new tools::AVR::Linker(*this); +} +// End AVR diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h index 7dab08915d48..3240357ba6b1 100644 --- a/lib/Driver/ToolChains.h +++ b/lib/Driver/ToolChains.h @@ -43,7 +43,7 @@ private: mutable llvm::SmallSet ArchsWithVersionTooLowErrors; public: - CudaInstallationDetector(const Driver &D, const llvm::Triple &Triple, + CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args); void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, @@ -709,12 +709,19 @@ public: const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + + void printVerboseInfo(raw_ostream &OS) const override; + protected: Tool *getTool(Action::ActionClass AC) const override; Tool *buildLinker() const override; Tool *buildAssembler() const override; private: + CudaInstallationDetector CudaInstallation; + std::string Base; std::string GccLibDir; std::string Ver; @@ -892,6 +899,10 @@ public: CudaToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const llvm::opt::ArgList &Args); + virtual const llvm::Triple *getAuxTriple() const override { + return &HostTC.getTriple(); + } + llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; @@ -924,6 +935,10 @@ public: SanitizerMask getSupportedSanitizers() const override; + VersionTuple + computeMSVCVersion(const Driver *D, + const llvm::opt::ArgList &Args) const override; + const ToolChain &HostTC; CudaInstallationDetector CudaInstallation; @@ -1147,6 +1162,9 @@ public: const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + bool getWindowsSDKDir(std::string &path, int &major, std::string &windowsSDKIncludeVersion, std::string &windowsSDKLibVersion) const; @@ -1166,6 +1184,8 @@ public: types::ID InputType) const override; SanitizerMask getSupportedSanitizers() const override; + void printVerboseInfo(raw_ostream &OS) const override; + protected: void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, @@ -1179,6 +1199,8 @@ protected: private: VersionTuple getMSVCVersionFromTriple() const; VersionTuple getMSVCVersionFromExe() const; + + CudaInstallationDetector CudaInstallation; }; class LLVM_LIBRARY_VISIBILITY CrossWindowsToolChain : public Generic_GCC { @@ -1349,6 +1371,16 @@ public: SanitizerMask getSupportedSanitizers() const override; }; +class LLVM_LIBRARY_VISIBILITY AVRToolChain : public Generic_ELF { +protected: + Tool *buildLinker() const override; +public: + AVRToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + bool IsIntegratedAssemblerDefault() const override { return true; } +}; + + } // end namespace toolchains } // end namespace driver } // end namespace clang diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp index ea5ad7d051b6..8e02d45fcc4a 100644 --- a/lib/Driver/Tools.cpp +++ b/lib/Driver/Tools.cpp @@ -4086,13 +4086,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; - bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment(); - bool IsWindowsCygnus = - getToolChain().getTriple().isWindowsCygwinEnvironment(); - bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); - bool IsPS4CPU = getToolChain().getTriple().isPS4CPU(); - bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); - // Check number of inputs for sanity. We need at least one input. assert(Inputs.size() >= 1 && "Must have at least one input."); const InputInfo &Input = Inputs[0]; @@ -4106,6 +4099,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Inputs.size() == 1) && "Unable to handle multiple inputs."); + bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment(); + bool IsWindowsCygnus = + getToolChain().getTriple().isWindowsCygwinEnvironment(); + bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); + bool IsPS4CPU = getToolChain().getTriple().isPS4CPU(); + bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); + + // Adjust IsWindowsXYZ for CUDA compilations. Even when compiling in device + // mode (i.e., getToolchain().getTriple() is NVPTX, not Windows), we need to + // pass Windows-specific flags to cc1. + if (IsCuda) { + const llvm::Triple *AuxTriple = getToolChain().getAuxTriple(); + IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment(); + IsWindowsGNU |= AuxTriple && AuxTriple->isWindowsGNUEnvironment(); + IsWindowsCygnus |= AuxTriple && AuxTriple->isWindowsCygwinEnvironment(); + } + // C++ is not supported for IAMCU. if (IsIAMCU && types::isCXX(Input.getType())) D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU"; @@ -12191,3 +12201,19 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } + +void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + + std::string Linker = getToolChain().GetProgramPath(getShortName()); + ArgStringList CmdArgs; + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), + CmdArgs, Inputs)); +} +// AVR tools end. diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h index 98dcf841169e..9d5b892d424c 100644 --- a/lib/Driver/Tools.h +++ b/lib/Driver/Tools.h @@ -990,6 +990,19 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool { } // end namespace NVPTX +namespace AVR { +class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool { +public: + Linker(const ToolChain &TC) : GnuTool("AVR::Linker", "avr-ld", TC) {} + bool hasIntegratedCPP() const override { return false; } + bool isLinkJob() const override { return true; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; +} // end namespace AVR + } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp index 32ce966f798e..d8929969e6c1 100644 --- a/lib/Frontend/ASTUnit.cpp +++ b/lib/Frontend/ASTUnit.cpp @@ -245,7 +245,7 @@ ASTUnit::~ASTUnit() { // perform this operation here because we explicitly request that the // compiler instance *not* free these buffers for each invocation of the // parser. - if (Invocation.get() && OwnsRemappedFileBuffers) { + if (Invocation && OwnsRemappedFileBuffers) { PreprocessorOptions &PPOpts = Invocation->getPreprocessorOpts(); for (const auto &RB : PPOpts.RemappedFileBuffers) delete RB.second; @@ -257,7 +257,9 @@ ASTUnit::~ASTUnit() { fprintf(stderr, "--- %u translation units\n", --ActiveASTUnitObjects); } -void ASTUnit::setPreprocessor(Preprocessor *pp) { PP = pp; } +void ASTUnit::setPreprocessor(std::shared_ptr PP) { + this->PP = std::move(PP); +} /// \brief Determine the set of code-completion contexts in which this /// declaration should be shown. @@ -346,7 +348,7 @@ void ASTUnit::CacheCodeCompletionResults() { // Gather the set of global code completions. typedef CodeCompletionResult Result; SmallVector Results; - CachedCompletionAllocator = new GlobalCodeCompletionAllocator; + CachedCompletionAllocator = std::make_shared(); CodeCompletionTUInfo CCTUInfo(CachedCompletionAllocator); TheSema->GatherGlobalCodeCompletions(*CachedCompletionAllocator, CCTUInfo, Results); @@ -675,7 +677,7 @@ std::unique_ptr ASTUnit::LoadFromASTFile( AST->SourceMgr = new SourceManager(AST->getDiagnostics(), AST->getFileManager(), UserFilesAreVolatile); - AST->HSOpts = new HeaderSearchOptions(); + AST->HSOpts = std::make_shared(); AST->HSOpts->ModuleFormat = PCHContainerRdr.getFormat(); AST->HeaderInfo.reset(new HeaderSearch(AST->HSOpts, AST->getSourceManager(), @@ -683,7 +685,7 @@ std::unique_ptr ASTUnit::LoadFromASTFile( AST->ASTFileLangOpts, /*Target=*/nullptr)); - PreprocessorOptions *PPOpts = new PreprocessorOptions(); + auto PPOpts = std::make_shared(); for (const auto &RemappedFile : RemappedFiles) PPOpts->addRemappedFile(RemappedFile.first, RemappedFile.second); @@ -693,11 +695,11 @@ std::unique_ptr ASTUnit::LoadFromASTFile( HeaderSearch &HeaderInfo = *AST->HeaderInfo; unsigned Counter; - AST->PP = - new Preprocessor(PPOpts, AST->getDiagnostics(), AST->ASTFileLangOpts, - AST->getSourceManager(), HeaderInfo, *AST, - /*IILookup=*/nullptr, - /*OwnsHeaderSearch=*/false); + AST->PP = std::make_shared( + std::move(PPOpts), AST->getDiagnostics(), AST->ASTFileLangOpts, + AST->getSourceManager(), HeaderInfo, *AST, + /*IILookup=*/nullptr, + /*OwnsHeaderSearch=*/false); Preprocessor &PP = *AST->PP; AST->Ctx = new ASTContext(AST->ASTFileLangOpts, AST->getSourceManager(), @@ -926,7 +928,7 @@ public: const Preprocessor &PP, StringRef isysroot, std::unique_ptr Out) : PCHGenerator(PP, "", isysroot, std::make_shared(), - ArrayRef>(), + ArrayRef>(), /*AllowASTWithErrors=*/true), Unit(Unit), Hash(Unit.getCurrentTopLevelHashValue()), Action(Action), Out(std::move(Out)) { @@ -1046,10 +1048,7 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps, llvm::CrashRecoveryContextCleanupRegistrar CICleanup(Clang.get()); - IntrusiveRefCntPtr - CCInvocation(new CompilerInvocation(*Invocation)); - - Clang->setInvocation(CCInvocation.get()); + Clang->setInvocation(std::make_shared(*Invocation)); OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile(); // Set up diagnostics, capturing any diagnostics that would @@ -1342,8 +1341,8 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild, unsigned MaxLines) { - IntrusiveRefCntPtr - PreambleInvocation(new CompilerInvocation(PreambleInvocationIn)); + auto PreambleInvocation = + std::make_shared(PreambleInvocationIn); FrontendOptions &FrontendOpts = PreambleInvocation->getFrontendOpts(); PreprocessorOptions &PreprocessorOpts = PreambleInvocation->getPreprocessorOpts(); @@ -1521,7 +1520,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( llvm::CrashRecoveryContextCleanupRegistrar CICleanup(Clang.get()); - Clang->setInvocation(&*PreambleInvocation); + Clang->setInvocation(std::move(PreambleInvocation)); OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile(); // Set up diagnostics, capturing all of the diagnostics produced. @@ -1671,7 +1670,7 @@ void ASTUnit::transferASTDataFromCompilerInstance(CompilerInstance &CI) { if (CI.hasASTContext()) Ctx = &CI.getASTContext(); if (CI.hasPreprocessor()) - PP = &CI.getPreprocessor(); + PP = CI.getPreprocessorPtr(); CI.setSourceManager(nullptr); CI.setFileManager(nullptr); if (CI.hasTarget()) @@ -1707,30 +1706,29 @@ StringRef ASTUnit::getASTFileName() const { return Mod.FileName; } -ASTUnit *ASTUnit::create(CompilerInvocation *CI, - IntrusiveRefCntPtr Diags, - bool CaptureDiagnostics, - bool UserFilesAreVolatile) { - std::unique_ptr AST; - AST.reset(new ASTUnit(false)); +std::unique_ptr +ASTUnit::create(std::shared_ptr CI, + IntrusiveRefCntPtr Diags, + bool CaptureDiagnostics, bool UserFilesAreVolatile) { + std::unique_ptr AST(new ASTUnit(false)); ConfigureDiags(Diags, *AST, CaptureDiagnostics); - AST->Diagnostics = Diags; - AST->Invocation = CI; - AST->FileSystemOpts = CI->getFileSystemOpts(); IntrusiveRefCntPtr VFS = createVFSFromCompilerInvocation(*CI, *Diags); if (!VFS) return nullptr; + AST->Diagnostics = Diags; + AST->FileSystemOpts = CI->getFileSystemOpts(); + AST->Invocation = std::move(CI); AST->FileMgr = new FileManager(AST->FileSystemOpts, VFS); AST->UserFilesAreVolatile = UserFilesAreVolatile; AST->SourceMgr = new SourceManager(AST->getDiagnostics(), *AST->FileMgr, UserFilesAreVolatile); - return AST.release(); + return AST; } ASTUnit *ASTUnit::LoadFromCompilerInvocationAction( - CompilerInvocation *CI, + std::shared_ptr CI, std::shared_ptr PCHContainerOps, IntrusiveRefCntPtr Diags, FrontendAction *Action, ASTUnit *Unit, bool Persistent, StringRef ResourceFilesPath, @@ -1744,7 +1742,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction( ASTUnit *AST = Unit; if (!AST) { // Create the AST unit. - OwnAST.reset(create(CI, Diags, CaptureDiagnostics, UserFilesAreVolatile)); + OwnAST = create(CI, Diags, CaptureDiagnostics, UserFilesAreVolatile); AST = OwnAST.get(); if (!AST) return nullptr; @@ -1783,7 +1781,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction( llvm::CrashRecoveryContextCleanupRegistrar CICleanup(Clang.get()); - Clang->setInvocation(CI); + Clang->setInvocation(std::move(CI)); AST->OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile(); // Set up diagnostics, capturing any diagnostics that would @@ -1901,7 +1899,7 @@ bool ASTUnit::LoadFromCompilerInvocation( } std::unique_ptr ASTUnit::LoadFromCompilerInvocation( - CompilerInvocation *CI, + std::shared_ptr CI, std::shared_ptr PCHContainerOps, IntrusiveRefCntPtr Diags, FileManager *FileMgr, bool OnlyLocalDecls, bool CaptureDiagnostics, @@ -1918,7 +1916,7 @@ std::unique_ptr ASTUnit::LoadFromCompilerInvocation( AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults; AST->IncludeBriefCommentsInCodeCompletion = IncludeBriefCommentsInCodeCompletion; - AST->Invocation = CI; + AST->Invocation = std::move(CI); AST->FileSystemOpts = FileMgr->getFileSystemOpts(); AST->FileMgr = FileMgr; AST->UserFilesAreVolatile = UserFilesAreVolatile; @@ -1950,8 +1948,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine( assert(Diags.get() && "no DiagnosticsEngine was provided"); SmallVector StoredDiagnostics; - - IntrusiveRefCntPtr CI; + + std::shared_ptr CI; { @@ -1959,8 +1957,7 @@ ASTUnit *ASTUnit::LoadFromCommandLine( StoredDiagnostics); CI = clang::createInvocationFromCommandLine( - llvm::makeArrayRef(ArgBegin, ArgEnd), - Diags); + llvm::makeArrayRef(ArgBegin, ArgEnd), Diags); if (!CI) return nullptr; } @@ -2331,8 +2328,7 @@ void ASTUnit::CodeComplete( CompletionTimer.setOutput("Code completion @ " + File + ":" + Twine(Line) + ":" + Twine(Column)); - IntrusiveRefCntPtr - CCInvocation(new CompilerInvocation(*Invocation)); + auto CCInvocation = std::make_shared(*Invocation); FrontendOptions &FrontendOpts = CCInvocation->getFrontendOpts(); CodeCompleteOptions &CodeCompleteOpts = FrontendOpts.CodeCompleteOpts; @@ -2364,7 +2360,8 @@ void ASTUnit::CodeComplete( llvm::CrashRecoveryContextCleanupRegistrar CICleanup(Clang.get()); - Clang->setInvocation(&*CCInvocation); + auto &Inv = *CCInvocation; + Clang->setInvocation(std::move(CCInvocation)); OriginalSourceFile = Clang->getFrontendOpts().Inputs[0].getFile(); // Set up diagnostics, capturing any diagnostics produced. @@ -2372,8 +2369,8 @@ void ASTUnit::CodeComplete( CaptureDroppedDiagnostics Capture(true, Clang->getDiagnostics(), StoredDiagnostics); - ProcessWarningOptions(Diag, CCInvocation->getDiagnosticOpts()); - + ProcessWarningOptions(Diag, Inv.getDiagnosticOpts()); + // Create the target instance. Clang->setTarget(TargetInfo::CreateTargetInfo( Clang->getDiagnostics(), Clang->getInvocation().TargetOpts)); @@ -2429,7 +2426,7 @@ void ASTUnit::CodeComplete( if (!llvm::sys::fs::getUniqueID(MainPath, MainID)) { if (CompleteFileID == MainID && Line > 1) OverrideMainBuffer = getMainBufferWithPrecompiledPreamble( - PCHContainerOps, *CCInvocation, false, Line - 1); + PCHContainerOps, Inv, false, Line - 1); } } } diff --git a/lib/Frontend/ChainedIncludesSource.cpp b/lib/Frontend/ChainedIncludesSource.cpp index c5b77ee90e56..b984c2ed0dd5 100644 --- a/lib/Frontend/ChainedIncludesSource.cpp +++ b/lib/Frontend/ChainedIncludesSource.cpp @@ -147,7 +147,7 @@ IntrusiveRefCntPtr clang::createChainedIncludesSource( std::unique_ptr Clang( new CompilerInstance(CI.getPCHContainerOperations())); - Clang->setInvocation(CInvok.release()); + Clang->setInvocation(std::move(CInvok)); Clang->setDiagnostics(Diags.get()); Clang->setTarget(TargetInfo::CreateTargetInfo( Clang->getDiagnostics(), Clang->getInvocation().TargetOpts)); @@ -159,7 +159,7 @@ IntrusiveRefCntPtr clang::createChainedIncludesSource( Clang->createASTContext(); auto Buffer = std::make_shared(); - ArrayRef> Extensions; + ArrayRef> Extensions; auto consumer = llvm::make_unique( Clang->getPreprocessor(), "-", /*isysroot=*/"", Buffer, Extensions, /*AllowASTWithErrors=*/true); diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp index ccddd14f0f34..afcaa6e87878 100644 --- a/lib/Frontend/CompilerInstance.cpp +++ b/lib/Frontend/CompilerInstance.cpp @@ -66,8 +66,9 @@ CompilerInstance::~CompilerInstance() { assert(OutputFiles.empty() && "Still output files in flight?"); } -void CompilerInstance::setInvocation(CompilerInvocation *Value) { - Invocation = Value; +void CompilerInstance::setInvocation( + std::shared_ptr Value) { + Invocation = std::move(Value); } bool CompilerInstance::shouldBuildGlobalModuleIndex() const { @@ -96,7 +97,9 @@ void CompilerInstance::setSourceManager(SourceManager *Value) { SourceMgr = Value; } -void CompilerInstance::setPreprocessor(Preprocessor *Value) { PP = Value; } +void CompilerInstance::setPreprocessor(std::shared_ptr Value) { + PP = std::move(Value); +} void CompilerInstance::setASTContext(ASTContext *Value) { Context = Value; @@ -365,14 +368,13 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { PTHMgr = PTHManager::Create(PPOpts.TokenCache, getDiagnostics()); // Create the Preprocessor. - HeaderSearch *HeaderInfo = new HeaderSearch(&getHeaderSearchOpts(), - getSourceManager(), - getDiagnostics(), - getLangOpts(), - &getTarget()); - PP = new Preprocessor(&getPreprocessorOpts(), getDiagnostics(), getLangOpts(), - getSourceManager(), *HeaderInfo, *this, PTHMgr, - /*OwnsHeaderSearch=*/true, TUKind); + HeaderSearch *HeaderInfo = + new HeaderSearch(getHeaderSearchOptsPtr(), getSourceManager(), + getDiagnostics(), getLangOpts(), &getTarget()); + PP = std::make_shared( + Invocation->getPreprocessorOptsPtr(), getDiagnostics(), getLangOpts(), + getSourceManager(), *HeaderInfo, *this, PTHMgr, + /*OwnsHeaderSearch=*/true, TUKind); PP->Initialize(getTarget(), getAuxTarget()); // Note that this is different then passing PTHMgr to Preprocessor's ctor. @@ -498,7 +500,7 @@ IntrusiveRefCntPtr CompilerInstance::createPCHExternalASTSource( StringRef Path, StringRef Sysroot, bool DisablePCHValidation, bool AllowPCHWithCompilerErrors, Preprocessor &PP, ASTContext &Context, const PCHContainerReader &PCHContainerRdr, - ArrayRef> Extensions, + ArrayRef> Extensions, void *DeserializationListener, bool OwnDeserializationListener, bool Preamble, bool UseGlobalModuleIndex) { HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts(); @@ -1018,8 +1020,8 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, = ImportingInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap(); // Construct a compiler invocation for creating this module. - IntrusiveRefCntPtr Invocation - (new CompilerInvocation(ImportingInstance.getInvocation())); + auto Invocation = + std::make_shared(ImportingInstance.getInvocation()); PreprocessorOptions &PPOpts = Invocation->getPreprocessorOpts(); @@ -1049,7 +1051,8 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, PreprocessorOptions &ImportingPPOpts = ImportingInstance.getInvocation().getPreprocessorOpts(); if (!ImportingPPOpts.FailedModules) - ImportingPPOpts.FailedModules = new PreprocessorOptions::FailedModulesSet; + ImportingPPOpts.FailedModules = + std::make_shared(); PPOpts.FailedModules = ImportingPPOpts.FailedModules; // If there is a module map file, build the module using the module map. @@ -1074,7 +1077,8 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, // module. CompilerInstance Instance(ImportingInstance.getPCHContainerOperations(), /*BuildingModule=*/true); - Instance.setInvocation(&*Invocation); + auto &Inv = *Invocation; + Instance.setInvocation(std::move(Invocation)); Instance.createDiagnostics(new ForwardingDiagnosticConsumer( ImportingInstance.getDiagnosticClient()), @@ -1096,7 +1100,7 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, // between all of the module CompilerInstances. Other than that, we don't // want to produce any dependency output from the module build. Instance.setModuleDepCollector(ImportingInstance.getModuleDepCollector()); - Invocation->getDependencyOutputOpts() = DependencyOutputOptions(); + Inv.getDependencyOutputOpts() = DependencyOutputOptions(); // Get or create the module map that we'll use to build this module. std::string InferredModuleMapContent; diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index ca4a7655a37d..93bbcc42da1a 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -60,12 +60,11 @@ CompilerInvocationBase::CompilerInvocationBase() PreprocessorOpts(new PreprocessorOptions()) {} CompilerInvocationBase::CompilerInvocationBase(const CompilerInvocationBase &X) - : RefCountedBase(), - LangOpts(new LangOptions(*X.getLangOpts())), - TargetOpts(new TargetOptions(X.getTargetOpts())), - DiagnosticOpts(new DiagnosticOptions(X.getDiagnosticOpts())), - HeaderSearchOpts(new HeaderSearchOptions(X.getHeaderSearchOpts())), - PreprocessorOpts(new PreprocessorOptions(X.getPreprocessorOpts())) {} + : LangOpts(new LangOptions(*X.getLangOpts())), + TargetOpts(new TargetOptions(X.getTargetOpts())), + DiagnosticOpts(new DiagnosticOptions(X.getDiagnosticOpts())), + HeaderSearchOpts(new HeaderSearchOptions(X.getHeaderSearchOpts())), + PreprocessorOpts(new PreprocessorOptions(X.getPreprocessorOpts())) {} CompilerInvocationBase::~CompilerInvocationBase() {} @@ -1214,8 +1213,8 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, // Add the testing module file extension. Opts.ModuleFileExtensions.push_back( - new TestModuleFileExtension(BlockName, MajorVersion, MinorVersion, - Hashed, UserInfo)); + std::make_shared( + BlockName, MajorVersion, MinorVersion, Hashed, UserInfo)); } if (const Arg *A = Args.getLastArg(OPT_code_completion_at)) { diff --git a/lib/Frontend/CreateInvocationFromCommandLine.cpp b/lib/Frontend/CreateInvocationFromCommandLine.cpp index 1e9e57afb6bd..16269064b6e1 100644 --- a/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -30,9 +30,9 @@ using namespace llvm::opt; /// /// \return A CompilerInvocation, or 0 if none was built for the given /// argument vector. -CompilerInvocation * -clang::createInvocationFromCommandLine(ArrayRef ArgList, - IntrusiveRefCntPtr Diags) { +std::unique_ptr clang::createInvocationFromCommandLine( + ArrayRef ArgList, + IntrusiveRefCntPtr Diags) { if (!Diags.get()) { // No diagnostics engine was provided, so create our own diagnostics object // with the default options. @@ -93,12 +93,12 @@ clang::createInvocationFromCommandLine(ArrayRef ArgList, } const ArgStringList &CCArgs = Cmd.getArguments(); - std::unique_ptr CI(new CompilerInvocation()); + auto CI = llvm::make_unique(); if (!CompilerInvocation::CreateFromArgs(*CI, const_cast(CCArgs.data()), const_cast(CCArgs.data()) + CCArgs.size(), *Diags)) return nullptr; - return CI.release(); + return CI; } diff --git a/lib/Frontend/FrontendAction.cpp b/lib/Frontend/FrontendAction.cpp index e871b310302d..39fc1371a9ef 100644 --- a/lib/Frontend/FrontendAction.cpp +++ b/lib/Frontend/FrontendAction.cpp @@ -224,7 +224,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, // file, otherwise the CompilerInstance will happily destroy them. CI.setFileManager(&AST->getFileManager()); CI.setSourceManager(&AST->getSourceManager()); - CI.setPreprocessor(&AST->getPreprocessor()); + CI.setPreprocessor(AST->getPreprocessorPtr()); CI.setASTContext(&AST->getASTContext()); setCurrentInput(Input, std::move(AST)); diff --git a/lib/Frontend/SerializedDiagnosticPrinter.cpp b/lib/Frontend/SerializedDiagnosticPrinter.cpp index 1ea5a342e1d8..7f88c919e24a 100644 --- a/lib/Frontend/SerializedDiagnosticPrinter.cpp +++ b/lib/Frontend/SerializedDiagnosticPrinter.cpp @@ -143,7 +143,7 @@ class SDiagsWriter : public DiagnosticConsumer { struct SharedState; - explicit SDiagsWriter(IntrusiveRefCntPtr State) + explicit SDiagsWriter(std::shared_ptr State) : LangOpts(nullptr), OriginalInstance(false), MergeChildRecords(false), State(std::move(State)) {} @@ -151,7 +151,7 @@ public: SDiagsWriter(StringRef File, DiagnosticOptions *Diags, bool MergeChildRecords) : LangOpts(nullptr), OriginalInstance(true), MergeChildRecords(MergeChildRecords), - State(new SharedState(File, Diags)) { + State(std::make_shared(File, Diags)) { if (MergeChildRecords) RemoveOldDiagnostics(); EmitPreamble(); @@ -251,7 +251,7 @@ private: /// \brief State that is shared among the various clones of this diagnostic /// consumer. - struct SharedState : RefCountedBase { + struct SharedState { SharedState(StringRef File, DiagnosticOptions *Diags) : DiagOpts(Diags), Stream(Buffer), OutputFile(File.str()), EmittedAnyDiagBlocks(false) {} @@ -299,7 +299,7 @@ private: }; /// \brief State shared among the various clones of this diagnostic consumer. - IntrusiveRefCntPtr State; + std::shared_ptr State; }; } // end anonymous namespace @@ -422,15 +422,15 @@ void SDiagsWriter::EmitPreamble() { EmitMetaBlock(); } -static void AddSourceLocationAbbrev(llvm::BitCodeAbbrev *Abbrev) { +static void AddSourceLocationAbbrev(llvm::BitCodeAbbrev &Abbrev) { using namespace llvm; - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // File ID. - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line. - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column. - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Offset; + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // File ID. + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line. + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column. + Abbrev.Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Offset; } -static void AddRangeLocationAbbrev(llvm::BitCodeAbbrev *Abbrev) { +static void AddRangeLocationAbbrev(llvm::BitCodeAbbrev &Abbrev) { AddSourceLocationAbbrev(Abbrev); AddSourceLocationAbbrev(Abbrev); } @@ -449,7 +449,7 @@ void SDiagsWriter::EmitBlockInfoBlock() { EmitBlockID(BLOCK_META, "Meta", Stream, Record); EmitRecordID(RECORD_VERSION, "Version", Stream, Record); - BitCodeAbbrev *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_VERSION)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrevs.set(RECORD_VERSION, Stream.EmitBlockInfoAbbrev(BLOCK_META, Abbrev)); @@ -467,10 +467,10 @@ void SDiagsWriter::EmitBlockInfoBlock() { EmitRecordID(RECORD_FIXIT, "FixIt", Stream, Record); // Emit abbreviation for RECORD_DIAG. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_DIAG)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Diag level. - AddSourceLocationAbbrev(Abbrev); + AddSourceLocationAbbrev(*Abbrev); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Category. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Mapped Diag ID. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Text size. @@ -478,7 +478,7 @@ void SDiagsWriter::EmitBlockInfoBlock() { Abbrevs.set(RECORD_DIAG, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, Abbrev)); // Emit abbrevation for RECORD_CATEGORY. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_CATEGORY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Category ID. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // Text size. @@ -486,14 +486,14 @@ void SDiagsWriter::EmitBlockInfoBlock() { Abbrevs.set(RECORD_CATEGORY, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, Abbrev)); // Emit abbrevation for RECORD_SOURCE_RANGE. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_SOURCE_RANGE)); - AddRangeLocationAbbrev(Abbrev); + AddRangeLocationAbbrev(*Abbrev); Abbrevs.set(RECORD_SOURCE_RANGE, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, Abbrev)); // Emit the abbreviation for RECORD_DIAG_FLAG. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_DIAG_FLAG)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Mapped Diag ID. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Text size. @@ -502,7 +502,7 @@ void SDiagsWriter::EmitBlockInfoBlock() { Abbrev)); // Emit the abbreviation for RECORD_FILENAME. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_FILENAME)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Mapped file ID. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Size. @@ -513,9 +513,9 @@ void SDiagsWriter::EmitBlockInfoBlock() { Abbrev)); // Emit the abbreviation for RECORD_FIXIT. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(RECORD_FIXIT)); - AddRangeLocationAbbrev(Abbrev); + AddRangeLocationAbbrev(*Abbrev); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Text size. Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // FixIt text. Abbrevs.set(RECORD_FIXIT, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, diff --git a/lib/Frontend/TestModuleFileExtension.cpp b/lib/Frontend/TestModuleFileExtension.cpp index b43d45f7ae46..294f7e44cee5 100644 --- a/lib/Frontend/TestModuleFileExtension.cpp +++ b/lib/Frontend/TestModuleFileExtension.cpp @@ -24,11 +24,11 @@ void TestModuleFileExtension::Writer::writeExtensionContents( using namespace llvm; // Write an abbreviation for this record. - BitCodeAbbrev *Abv = new llvm::BitCodeAbbrev(); + auto Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(FIRST_EXTENSION_RECORD_ID)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of characters Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // message - auto Abbrev = Stream.EmitAbbrev(Abv); + auto Abbrev = Stream.EmitAbbrev(std::move(Abv)); // Write a message into the extension block. SmallString<64> Message; diff --git a/lib/Headers/__clang_cuda_cmath.h b/lib/Headers/__clang_cuda_cmath.h index 0eaa08b30cab..9bef82611aa4 100644 --- a/lib/Headers/__clang_cuda_cmath.h +++ b/lib/Headers/__clang_cuda_cmath.h @@ -72,6 +72,10 @@ __DEVICE__ int fpclassify(double __x) { __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } + +// For inscrutable reasons, the CUDA headers define these functions for us on +// Windows. +#ifndef _MSC_VER __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } @@ -79,6 +83,10 @@ __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } // __finitef, does not exist when compiling for MacOS. __isfinited is available // everywhere and is just as good. __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } +__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } +#endif + __DEVICE__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } @@ -109,8 +117,6 @@ __DEVICE__ bool islessgreater(float __x, float __y) { __DEVICE__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } -__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isunordered(float __x, float __y) { diff --git a/lib/Headers/__clang_cuda_intrinsics.h b/lib/Headers/__clang_cuda_intrinsics.h index 3df41fa290d3..b43ce21d0bb3 100644 --- a/lib/Headers/__clang_cuda_intrinsics.h +++ b/lib/Headers/__clang_cuda_intrinsics.h @@ -35,50 +35,50 @@ #pragma push_macro("__MAKE_SHUFFLES") #define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask) \ - inline __device__ int __FnName(int __in, int __offset, \ + inline __device__ int __FnName(int __val, int __offset, \ int __width = warpSize) { \ - return __IntIntrinsic(__in, __offset, \ + return __IntIntrinsic(__val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ - inline __device__ float __FnName(float __in, int __offset, \ + inline __device__ float __FnName(float __val, int __offset, \ int __width = warpSize) { \ - return __FloatIntrinsic(__in, __offset, \ + return __FloatIntrinsic(__val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ - inline __device__ unsigned int __FnName(unsigned int __in, int __offset, \ + inline __device__ unsigned int __FnName(unsigned int __val, int __offset, \ int __width = warpSize) { \ return static_cast( \ - ::__FnName(static_cast(__in), __offset, __width)); \ + ::__FnName(static_cast(__val), __offset, __width)); \ } \ - inline __device__ long long __FnName(long long __in, int __offset, \ + inline __device__ long long __FnName(long long __val, int __offset, \ int __width = warpSize) { \ struct __Bits { \ int __a, __b; \ }; \ - _Static_assert(sizeof(__in) == sizeof(__Bits)); \ + _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ - memcpy(&__in, &__tmp, sizeof(__in)); \ + memcpy(&__val, &__tmp, sizeof(__val)); \ __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \ - long long __out; \ - memcpy(&__out, &__tmp, sizeof(__tmp)); \ - return __out; \ + long long __ret; \ + memcpy(&__ret, &__tmp, sizeof(__tmp)); \ + return __ret; \ } \ inline __device__ unsigned long long __FnName( \ - unsigned long long __in, int __offset, int __width = warpSize) { \ - return static_cast( \ - ::__FnName(static_cast(__in), __offset, __width)); \ + unsigned long long __val, int __offset, int __width = warpSize) { \ + return static_cast(::__FnName( \ + static_cast(__val), __offset, __width)); \ } \ - inline __device__ double __FnName(double __in, int __offset, \ + inline __device__ double __FnName(double __val, int __offset, \ int __width = warpSize) { \ long long __tmp; \ - _Static_assert(sizeof(__tmp) == sizeof(__in)); \ - memcpy(&__tmp, &__in, sizeof(__in)); \ + _Static_assert(sizeof(__tmp) == sizeof(__val)); \ + memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp = ::__FnName(__tmp, __offset, __width); \ - double __out; \ - memcpy(&__out, &__tmp, sizeof(__out)); \ - return __out; \ + double __ret; \ + memcpy(&__ret, &__tmp, sizeof(__ret)); \ + return __ret; \ } __MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f); diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index d1d1d8026325..a8618816d5bb 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -12574,6 +12574,9 @@ static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, int __b) { #ifdef __POWER9_VECTOR__ +#define vec_insert4b __builtin_vsx_insertword +#define vec_extract4b __builtin_vsx_extractuword + /* vec_extract_exp */ static __inline__ vector unsigned int __ATTRS_o_ai diff --git a/lib/Headers/intrin.h b/lib/Headers/intrin.h index 7c91ebaee8cb..a35262af846a 100644 --- a/lib/Headers/intrin.h +++ b/lib/Headers/intrin.h @@ -65,7 +65,6 @@ static __inline__ void __cpuid(int[4], int); static __inline__ void __cpuidex(int[4], int, int); -void __debugbreak(void); static __inline__ __int64 __emul(int, int); static __inline__ @@ -109,10 +108,6 @@ void __outdword(unsigned short, unsigned long); void __outdwordstring(unsigned short, unsigned long *, unsigned long); void __outword(unsigned short, unsigned short); void __outwordstring(unsigned short, unsigned short *, unsigned long); -static __inline__ -unsigned int __popcnt(unsigned int); -static __inline__ -unsigned short __popcnt16(unsigned short); unsigned long __readcr0(void); unsigned long __readcr2(void); static __inline__ @@ -124,8 +119,6 @@ unsigned int __readdr(unsigned int); static __inline__ unsigned char __readfsbyte(unsigned long); static __inline__ -unsigned long __readfsdword(unsigned long); -static __inline__ unsigned __int64 __readfsqword(unsigned long); static __inline__ unsigned short __readfsword(unsigned long); @@ -179,108 +172,34 @@ static __inline__ unsigned char _bittestandreset(long *, long); static __inline__ unsigned char _bittestandset(long *, long); -unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); -unsigned long __cdecl _byteswap_ulong(unsigned long); -unsigned short __cdecl _byteswap_ushort(unsigned short); void __cdecl _disable(void); void __cdecl _enable(void); long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); -static __inline__ -long _InterlockedAnd(long volatile *_Value, long _Mask); -static __inline__ -short _InterlockedAnd16(short volatile *_Value, short _Mask); -static __inline__ -char _InterlockedAnd8(char volatile *_Value, char _Mask); unsigned char _interlockedbittestandreset(long volatile *, long); static __inline__ unsigned char _interlockedbittestandset(long volatile *, long); -static __inline__ -long __cdecl _InterlockedCompareExchange(long volatile *_Destination, - long _Exchange, long _Comparand); long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); -static __inline__ -short _InterlockedCompareExchange16(short volatile *_Destination, - short _Exchange, short _Comparand); -static __inline__ -__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, - __int64 _Exchange, __int64 _Comparand); __int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, __int64); __int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, __int64); -static __inline__ -char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, - char _Comparand); void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *); void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, void *); -static __inline__ -long __cdecl _InterlockedDecrement(long volatile *_Addend); -static __inline__ -short _InterlockedDecrement16(short volatile *_Addend); -long _InterlockedExchange(long volatile *_Target, long _Value); -static __inline__ -short _InterlockedExchange16(short volatile *_Target, short _Value); -static __inline__ -char _InterlockedExchange8(char volatile *_Target, char _Value); -static __inline__ -long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); long _InterlockedExchangeAdd_HLERelease(long volatile *, long); -static __inline__ -short _InterlockedExchangeAdd16(short volatile *_Addend, short _Value); __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); __int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); -static __inline__ -char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); -static __inline__ -long __cdecl _InterlockedIncrement(long volatile *_Addend); -static __inline__ -short _InterlockedIncrement16(short volatile *_Addend); -static __inline__ -long _InterlockedOr(long volatile *_Value, long _Mask); -static __inline__ -short _InterlockedOr16(short volatile *_Value, short _Mask); -static __inline__ -char _InterlockedOr8(char volatile *_Value, char _Mask); -static __inline__ -long _InterlockedXor(long volatile *_Value, long _Mask); -static __inline__ -short _InterlockedXor16(short volatile *_Value, short _Mask); -static __inline__ -char _InterlockedXor8(char volatile *_Value, char _Mask); void __cdecl _invpcid(unsigned int, void *); -static __inline__ -unsigned long __cdecl _lrotl(unsigned long, int); -static __inline__ -unsigned long __cdecl _lrotr(unsigned long, int); static __inline__ void __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadBarrier(void); static __inline__ void __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadWriteBarrier(void); -static __inline__ -void *_ReturnAddress(void); unsigned int _rorx_u32(unsigned int, const unsigned int); -static __inline__ -unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); -static __inline__ -unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); -static __inline__ -unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); -static __inline__ -unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); -static __inline__ -unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); -static __inline__ -unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); -static __inline__ -unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); -static __inline__ -unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); int _sarx_i32(int, unsigned int); #if __STDC_HOSTED__ int __cdecl _setjmp(jmp_buf); @@ -318,8 +237,6 @@ unsigned __int64 __lzcnt64(unsigned __int64); static __inline__ void __movsq(unsigned long long *, unsigned long long const *, size_t); static __inline__ -unsigned __int64 __popcnt64(unsigned __int64); -static __inline__ unsigned char __readgsbyte(unsigned long); static __inline__ unsigned long __readgsdword(unsigned long); @@ -357,7 +274,6 @@ static __inline__ unsigned char _bittestandreset64(__int64 *, __int64); static __inline__ unsigned char _bittestandset64(__int64 *, __int64); -unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); long _InterlockedAnd_np(long volatile *_Value, long _Mask); short _InterlockedAnd16_np(short volatile *_Value, short _Mask); __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); @@ -383,11 +299,8 @@ __int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, __int64); __int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); -void *_InterlockedCompareExchangePointer(void *volatile *_Destination, - void *_Exchange, void *_Comparand); void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, void *_Exchange, void *_Comparand); -void *_InterlockedExchangePointer(void *volatile *_Target, void *_Value); long _InterlockedOr_np(long volatile *_Value, long _Mask); short _InterlockedOr16_np(short volatile *_Value, short _Mask); __int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); @@ -398,9 +311,6 @@ __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedXor8_np(char volatile *_Value, char _Mask); unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); __int64 _sarx_i64(__int64, unsigned int); -#if __STDC_HOSTED__ -int __cdecl _setjmpex(jmp_buf); -#endif unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); static __inline__ diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index b5228fc6c8cb..fa2a76ef47ca 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -54,7 +54,7 @@ HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) { ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {} -HeaderSearch::HeaderSearch(IntrusiveRefCntPtr HSOpts, +HeaderSearch::HeaderSearch(std::shared_ptr HSOpts, SourceManager &SourceMgr, DiagnosticsEngine &Diags, const LangOptions &LangOpts, const TargetInfo *Target) diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 0f7473b8c1ff..91319bedd6f0 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -68,7 +68,7 @@ LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) //===----------------------------------------------------------------------===// ExternalPreprocessorSource::~ExternalPreprocessorSource() { } -Preprocessor::Preprocessor(IntrusiveRefCntPtr PPOpts, +Preprocessor::Preprocessor(std::shared_ptr PPOpts, DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index ad4005747310..ba24adefe6b0 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -177,8 +177,12 @@ void Parser::ParseGNUAttributes(ParsedAttributes &attrs, if (!ClassStack.empty() && !LateAttrs->parseSoon()) getCurrentClass().LateParsedDeclarations.push_back(LA); - // consume everything up to and including the matching right parens - ConsumeAndStoreUntil(tok::r_paren, LA->Toks, true, false); + // Be sure ConsumeAndStoreUntil doesn't see the start l_paren, since it + // recursively consumes balanced parens. + LA->Toks.push_back(Tok); + ConsumeParen(); + // Consume everything up to and including the matching right parens. + ConsumeAndStoreUntil(tok::r_paren, LA->Toks, /*StopAtSemi=*/true); Token Eof; Eof.startToken(); diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index caf2320f8fc1..55b5ff498574 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -2751,6 +2751,7 @@ void Parser::ParseBlockId(SourceLocation CaretLoc) { // Parse the block-declarator. Declarator DeclaratorInfo(DS, Declarator::BlockLiteralContext); + DeclaratorInfo.setFunctionDefinitionKind(FDK_Definition); ParseDeclarator(DeclaratorInfo); MaybeParseGNUAttributes(DeclaratorInfo); @@ -2789,6 +2790,7 @@ ExprResult Parser::ParseBlockLiteralExpression() { // Parse the return type if present. DeclSpec DS(AttrFactory); Declarator ParamInfo(DS, Declarator::BlockLiteralContext); + ParamInfo.setFunctionDefinitionKind(FDK_Definition); // FIXME: Since the return type isn't actually parsed, it can't be used to // fill ParamInfo with an initial valid range, so do it manually. ParamInfo.SetSourceRange(SourceRange(Tok.getLocation(), Tok.getLocation())); diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp index 2dc6a0739bc8..89733237c153 100644 --- a/lib/Parse/ParsePragma.cpp +++ b/lib/Parse/ParsePragma.cpp @@ -506,10 +506,12 @@ void Parser::HandlePragmaOpenCLExtension() { // overriding all previously issued extension directives, but only if the // behavior is set to disable." if (Name == "all") { - if (State == Disable) + if (State == Disable) { Opt.disableAll(); - else + Opt.enableSupportedCore(getLangOpts().OpenCLVersion); + } else { PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; + } } else if (State == Begin) { if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts().OpenCLVersion)) { diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp index 3eef366b75b3..94cfc4baca51 100644 --- a/lib/Sema/SemaCodeComplete.cpp +++ b/lib/Sema/SemaCodeComplete.cpp @@ -3720,9 +3720,17 @@ static void AddObjCProperties( Builder.AddPlaceholderChunk( Builder.getAllocator().CopyString(PlaceholderStr)); + // When completing blocks properties that return void the default + // property completion result should show up before the setter, + // otherwise the setter completion should show up before the default + // property completion, as we normally want to use the result of the + // call. Results.MaybeAddResult( Result(Builder.TakeString(), P, - Results.getBasePriority(P) + CCD_BlockPropertySetter), + Results.getBasePriority(P) + + (BlockLoc.getTypePtr()->getReturnType()->isVoidType() + ? CCD_BlockPropertySetter + : -CCD_BlockPropertySetter)), CurContext); } }; diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index 084bd4c45eda..a650621b573a 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -5395,6 +5395,26 @@ static void ReferenceDllExportedMethods(Sema &S, CXXRecordDecl *Class) { } } +static void checkForMultipleExportedDefaultConstructors(Sema &S, CXXRecordDecl *Class) { + CXXConstructorDecl *LastExportedDefaultCtor = nullptr; + for (Decl *Member : Class->decls()) { + // Look for exported default constructors. + auto *CD = dyn_cast(Member); + if (!CD || !CD->isDefaultConstructor() || !CD->hasAttr()) + continue; + + if (LastExportedDefaultCtor) { + S.Diag(LastExportedDefaultCtor->getLocation(), + diag::err_attribute_dll_ambiguous_default_ctor) + << Class; + S.Diag(CD->getLocation(), diag::note_entity_declared_at) + << CD->getDeclName(); + return; + } + LastExportedDefaultCtor = CD; + } +} + /// \brief Check class-level dllimport/dllexport attribute. void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) { Attr *ClassAttr = getDLLAttr(Class); @@ -10362,64 +10382,11 @@ void Sema::ActOnFinishCXXMemberDecls() { DelayedExceptionSpecChecks.clear(); return; } - } -} - -static void checkDefaultArgExprsForConstructors(Sema &S, CXXRecordDecl *Class) { - // Don't do anything for template patterns. - if (Class->getDescribedClassTemplate()) - return; - - CallingConv ExpectedCallingConv = S.Context.getDefaultCallingConvention( - /*IsVariadic=*/false, /*IsCXXMethod=*/true); - - CXXConstructorDecl *LastExportedDefaultCtor = nullptr; - for (Decl *Member : Class->decls()) { - auto *CD = dyn_cast(Member); - if (!CD) { - // Recurse on nested classes. - if (auto *NestedRD = dyn_cast(Member)) - checkDefaultArgExprsForConstructors(S, NestedRD); - continue; - } else if (!CD->isDefaultConstructor() || !CD->hasAttr()) { - continue; - } - - CallingConv ActualCallingConv = - CD->getType()->getAs()->getCallConv(); - - // Skip default constructors with typical calling conventions and no default - // arguments. - unsigned NumParams = CD->getNumParams(); - if (ExpectedCallingConv == ActualCallingConv && NumParams == 0) - continue; - - if (LastExportedDefaultCtor) { - S.Diag(LastExportedDefaultCtor->getLocation(), - diag::err_attribute_dll_ambiguous_default_ctor) << Class; - S.Diag(CD->getLocation(), diag::note_entity_declared_at) - << CD->getDeclName(); - return; - } - LastExportedDefaultCtor = CD; - - for (unsigned I = 0; I != NumParams; ++I) { - (void)S.CheckCXXDefaultArgExpr(Class->getLocation(), CD, - CD->getParamDecl(I)); - S.DiscardCleanupsInEvaluationContext(); - } + checkForMultipleExportedDefaultConstructors(*this, Record); } } void Sema::ActOnFinishCXXNonNestedClass(Decl *D) { - auto *RD = dyn_cast(D); - - // Default constructors that are annotated with __declspec(dllexport) which - // have default arguments or don't use the standard calling convention are - // wrapped with a thunk called the default constructor closure. - if (RD && Context.getTargetInfo().getCXXABI().isMicrosoft()) - checkDefaultArgExprsForConstructors(*this, RD); - referenceDLLExportedClassMethods(); } diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 3c554c9a5244..1509b22a9e5a 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -2777,6 +2777,9 @@ bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, /// were not overloaded, and it doesn't promise that the declaration /// will in fact be used. static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D) { + if (D->isInvalidDecl()) + return true; + if (isa(D)) { S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName(); return true; diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index 5f769cc40ded..1379440e8a03 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -7262,6 +7262,8 @@ public: while (TypoCorrection TC = State.Consumer->getNextCorrection()) { if (InitDecl && TC.getFoundDecl() == InitDecl) continue; + // FIXME: If we would typo-correct to an invalid declaration, it's + // probably best to just suppress all errors from this typo correction. ExprResult NE = State.RecoveryHandler ? State.RecoveryHandler(SemaRef, E, TC) : attemptRecovery(SemaRef, *State.Consumer, TC); diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp index 1c026d7adb36..33574b9aec35 100644 --- a/lib/Sema/SemaOverload.cpp +++ b/lib/Sema/SemaOverload.cpp @@ -604,7 +604,8 @@ clang::MakeDeductionFailureInfo(ASTContext &Context, Result.Data = Info.Param.getOpaqueValue(); break; - case Sema::TDK_DeducedMismatch: { + case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: { // FIXME: Should allocate from normal heap so that we can free this later. auto *Saved = new (Context) DFIDeducedMismatchArgs; Saved->FirstArg = Info.FirstArg; @@ -664,6 +665,7 @@ void DeductionFailureInfo::Destroy() { case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: // FIXME: Destroy the data? Data = nullptr; @@ -699,6 +701,7 @@ TemplateParameter DeductionFailureInfo::getTemplateParameter() { case Sema::TDK_TooFewArguments: case Sema::TDK_SubstitutionFailure: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: case Sema::TDK_CUDATargetMismatch: return TemplateParameter(); @@ -735,6 +738,7 @@ TemplateArgumentList *DeductionFailureInfo::getTemplateArgumentList() { return nullptr; case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: return static_cast(Data)->TemplateArgs; case Sema::TDK_SubstitutionFailure: @@ -764,6 +768,7 @@ const TemplateArgument *DeductionFailureInfo::getFirstArg() { case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: return &static_cast(Data)->FirstArg; @@ -791,6 +796,7 @@ const TemplateArgument *DeductionFailureInfo::getSecondArg() { case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: return &static_cast(Data)->SecondArg; @@ -803,11 +809,14 @@ const TemplateArgument *DeductionFailureInfo::getSecondArg() { } llvm::Optional DeductionFailureInfo::getCallArgIndex() { - if (static_cast(Result) == - Sema::TDK_DeducedMismatch) + switch (static_cast(Result)) { + case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: return static_cast(Data)->CallArgIndex; - return llvm::None; + default: + return llvm::None; + } } void OverloadCandidateSet::destroyCandidates() { @@ -9682,7 +9691,8 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated, return; } - case Sema::TDK_DeducedMismatch: { + case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: { // Format the template argument list into the argument string. SmallString<128> TemplateArgString; if (TemplateArgumentList *Args = @@ -9695,7 +9705,8 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated, S.Diag(Templated->getLocation(), diag::note_ovl_candidate_deduced_mismatch) << (*DeductionFailure.getCallArgIndex() + 1) << *DeductionFailure.getFirstArg() << *DeductionFailure.getSecondArg() - << TemplateArgString; + << TemplateArgString + << (DeductionFailure.Result == Sema::TDK_DeducedMismatchNested); break; } @@ -10012,6 +10023,7 @@ static unsigned RankDeductionFailure(const DeductionFailureInfo &DFI) { case Sema::TDK_SubstitutionFailure: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: case Sema::TDK_MiscellaneousDeductionFailure: case Sema::TDK_CUDATargetMismatch: diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp index c16b28bcf139..b79904c0a703 100644 --- a/lib/Sema/SemaTemplateDeduction.cpp +++ b/lib/Sema/SemaTemplateDeduction.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/TypeOrdering.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Sema.h" #include "clang/Sema/Template.h" @@ -1899,8 +1900,9 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // Check whether we have enough arguments. if (!hasTemplateArgumentForDeduction(Args, ArgIdx)) - return NumberOfArgumentsMustMatch ? Sema::TDK_TooFewArguments - : Sema::TDK_Success; + return NumberOfArgumentsMustMatch + ? Sema::TDK_MiscellaneousDeductionFailure + : Sema::TDK_Success; // C++1z [temp.deduct.type]p9: // During partial ordering, if Ai was originally a pack expansion [and] @@ -2214,25 +2216,26 @@ static Sema::TemplateDeductionResult ConvertDeducedTemplateArguments( if (!Deduced[I].isNull()) { if (I < NumAlreadyConverted) { - // We have already fully type-checked and converted this - // argument, because it was explicitly-specified. Just record the - // presence of this argument. - Builder.push_back(Deduced[I]); // We may have had explicitly-specified template arguments for a // template parameter pack (that may or may not have been extended // via additional deduced arguments). - if (Param->isParameterPack() && CurrentInstantiationScope) { - if (CurrentInstantiationScope->getPartiallySubstitutedPack() == - Param) { - // Forget the partially-substituted pack; its substitution is now - // complete. - CurrentInstantiationScope->ResetPartiallySubstitutedPack(); - } + if (Param->isParameterPack() && CurrentInstantiationScope && + CurrentInstantiationScope->getPartiallySubstitutedPack() == Param) { + // Forget the partially-substituted pack; its substitution is now + // complete. + CurrentInstantiationScope->ResetPartiallySubstitutedPack(); + // We still need to check the argument in case it was extended by + // deduction. + } else { + // We have already fully type-checked and converted this + // argument, because it was explicitly-specified. Just record the + // presence of this argument. + Builder.push_back(Deduced[I]); + continue; } - continue; } - // We have deduced this argument, so it still needs to be + // We may have deduced this argument, so it still needs to be // checked and converted. if (ConvertDeducedTemplateArgument(S, Param, Deduced[I], Template, Info, IsDeduced, Builder)) { @@ -2854,6 +2857,36 @@ CheckOriginalCallArgDeduction(Sema &S, Sema::OriginalCallArg OriginalArg, return true; } +/// Find the pack index for a particular parameter index in an instantiation of +/// a function template with specific arguments. +/// +/// \return The pack index for whichever pack produced this parameter, or -1 +/// if this was not produced by a parameter. Intended to be used as the +/// ArgumentPackSubstitutionIndex for further substitutions. +// FIXME: We should track this in OriginalCallArgs so we don't need to +// reconstruct it here. +static unsigned getPackIndexForParam(Sema &S, + FunctionTemplateDecl *FunctionTemplate, + const MultiLevelTemplateArgumentList &Args, + unsigned ParamIdx) { + unsigned Idx = 0; + for (auto *PD : FunctionTemplate->getTemplatedDecl()->parameters()) { + if (PD->isParameterPack()) { + unsigned NumExpansions = + S.getNumArgumentsInExpansion(PD->getType(), Args).getValueOr(1); + if (Idx + NumExpansions > ParamIdx) + return ParamIdx - Idx; + Idx += NumExpansions; + } else { + if (Idx == ParamIdx) + return -1; // Not a pack expansion + ++Idx; + } + } + + llvm_unreachable("parameter index would not be produced from template"); +} + /// \brief Finish template argument deduction for a function template, /// checking the deduced template arguments for completeness and forming /// the function template specialization. @@ -2904,9 +2937,9 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate, DeclContext *Owner = FunctionTemplate->getDeclContext(); if (FunctionTemplate->getFriendObjectKind()) Owner = FunctionTemplate->getLexicalDeclContext(); + MultiLevelTemplateArgumentList SubstArgs(*DeducedArgumentList); Specialization = cast_or_null( - SubstDecl(FunctionTemplate->getTemplatedDecl(), Owner, - MultiLevelTemplateArgumentList(*DeducedArgumentList))); + SubstDecl(FunctionTemplate->getTemplatedDecl(), Owner, SubstArgs)); if (!Specialization || Specialization->isInvalidDecl()) return TDK_SubstitutionFailure; @@ -2932,19 +2965,46 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate, // In general, the deduction process attempts to find template argument // values that will make the deduced A identical to A (after the type A // is transformed as described above). [...] + llvm::SmallDenseMap, QualType> DeducedATypes; for (unsigned I = 0, N = OriginalCallArgs->size(); I != N; ++I) { OriginalCallArg OriginalArg = (*OriginalCallArgs)[I]; - unsigned ParamIdx = OriginalArg.ArgIdx; + auto ParamIdx = OriginalArg.ArgIdx; if (ParamIdx >= Specialization->getNumParams()) + // FIXME: This presumably means a pack ended up smaller than we + // expected while deducing. Should this not result in deduction + // failure? Can it even happen? continue; - QualType DeducedA = Specialization->getParamDecl(ParamIdx)->getType(); + QualType DeducedA; + if (!OriginalArg.DecomposedParam) { + // P is one of the function parameters, just look up its substituted + // type. + DeducedA = Specialization->getParamDecl(ParamIdx)->getType(); + } else { + // P is a decomposed element of a parameter corresponding to a + // braced-init-list argument. Substitute back into P to find the + // deduced A. + QualType &CacheEntry = + DeducedATypes[{ParamIdx, OriginalArg.OriginalParamType}]; + if (CacheEntry.isNull()) { + ArgumentPackSubstitutionIndexRAII PackIndex( + *this, getPackIndexForParam(*this, FunctionTemplate, SubstArgs, + ParamIdx)); + CacheEntry = + SubstType(OriginalArg.OriginalParamType, SubstArgs, + Specialization->getTypeSpecStartLoc(), + Specialization->getDeclName()); + } + DeducedA = CacheEntry; + } + if (CheckOriginalCallArgDeduction(*this, OriginalArg, DeducedA)) { Info.FirstArg = TemplateArgument(DeducedA); Info.SecondArg = TemplateArgument(OriginalArg.OriginalArgType); Info.CallArgIndex = OriginalArg.ArgIdx; - return TDK_DeducedMismatch; + return OriginalArg.DecomposedParam ? TDK_DeducedMismatchNested + : TDK_DeducedMismatch; } } } @@ -3196,19 +3256,21 @@ static bool hasDeducibleTemplateParameters(Sema &S, FunctionTemplateDecl *FunctionTemplate, QualType T); -static Sema::TemplateDeductionResult DeduceTemplateArgumentByListElement( +static Sema::TemplateDeductionResult DeduceTemplateArgumentsFromCallArgument( Sema &S, TemplateParameterList *TemplateParams, QualType ParamType, Expr *Arg, TemplateDeductionInfo &Info, - SmallVectorImpl &Deduced, unsigned TDF); + SmallVectorImpl &Deduced, + SmallVectorImpl &OriginalCallArgs, + bool DecomposedParam, unsigned ArgIdx, unsigned TDF); /// \brief Attempt template argument deduction from an initializer list /// deemed to be an argument in a function call. -static Sema::TemplateDeductionResult -DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams, - QualType AdjustedParamType, InitListExpr *ILE, - TemplateDeductionInfo &Info, - SmallVectorImpl &Deduced, - unsigned TDF) { +static Sema::TemplateDeductionResult DeduceFromInitializerList( + Sema &S, TemplateParameterList *TemplateParams, QualType AdjustedParamType, + InitListExpr *ILE, TemplateDeductionInfo &Info, + SmallVectorImpl &Deduced, + SmallVectorImpl &OriginalCallArgs, unsigned ArgIdx, + unsigned TDF) { // C++ [temp.deduct.call]p1: (CWG 1591) // If removing references and cv-qualifiers from P gives // std::initializer_list or P0[N] for some P0 and N and the argument is @@ -3216,8 +3278,10 @@ DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams, // each element of the initializer list, taking P0 as a function template // parameter type and the initializer element as its argument // - // FIXME: Remove references and cv-qualifiers here? Consider - // std::initializer_list&&> + // We've already removed references and cv-qualifiers here. + if (!ILE->getNumInits()) + return Sema::TDK_Success; + QualType ElTy; auto *ArrTy = S.Context.getAsArrayType(AdjustedParamType); if (ArrTy) @@ -3231,15 +3295,15 @@ DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams, // Deduction only needs to be done for dependent types. if (ElTy->isDependentType()) { for (Expr *E : ILE->inits()) { - if (auto Result = DeduceTemplateArgumentByListElement( - S, TemplateParams, ElTy, E, Info, Deduced, TDF)) + if (auto Result = DeduceTemplateArgumentsFromCallArgument( + S, TemplateParams, ElTy, E, Info, Deduced, OriginalCallArgs, true, + ArgIdx, TDF)) return Result; } } // in the P0[N] case, if N is a non-type template parameter, N is deduced // from the length of the initializer list. - // FIXME: We're not supposed to get here if N would be deduced as 0. if (auto *DependentArrTy = dyn_cast_or_null(ArrTy)) { // Determine the array bound is something we can deduce. if (NonTypeTemplateParmDecl *NTTP = @@ -3258,30 +3322,35 @@ DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams, return Sema::TDK_Success; } -/// \brief Perform template argument deduction by matching a parameter type -/// against a single expression, where the expression is an element of -/// an initializer list that was originally matched against a parameter -/// of type \c initializer_list\. -static Sema::TemplateDeductionResult -DeduceTemplateArgumentByListElement(Sema &S, - TemplateParameterList *TemplateParams, - QualType ParamType, Expr *Arg, - TemplateDeductionInfo &Info, - SmallVectorImpl &Deduced, - unsigned TDF) { - // Handle the case where an init list contains another init list as the - // element. - if (InitListExpr *ILE = dyn_cast(Arg)) - return DeduceFromInitializerList(S, TemplateParams, - ParamType.getNonReferenceType(), ILE, Info, - Deduced, TDF); - - // For all other cases, just match by type. +/// \brief Perform template argument deduction per [temp.deduct.call] for a +/// single parameter / argument pair. +static Sema::TemplateDeductionResult DeduceTemplateArgumentsFromCallArgument( + Sema &S, TemplateParameterList *TemplateParams, QualType ParamType, + Expr *Arg, TemplateDeductionInfo &Info, + SmallVectorImpl &Deduced, + SmallVectorImpl &OriginalCallArgs, + bool DecomposedParam, unsigned ArgIdx, unsigned TDF) { QualType ArgType = Arg->getType(); + QualType OrigParamType = ParamType; + + // If P is a reference type [...] + // If P is a cv-qualified type [...] if (AdjustFunctionParmAndArgTypesForDeduction(S, TemplateParams, ParamType, ArgType, Arg, TDF)) return Sema::TDK_Success; + // If [...] the argument is a non-empty initializer list [...] + if (InitListExpr *ILE = dyn_cast(Arg)) + return DeduceFromInitializerList(S, TemplateParams, ParamType, ILE, Info, + Deduced, OriginalCallArgs, ArgIdx, TDF); + + // [...] the deduction process attempts to find template argument values + // that will make the deduced A identical to A + // + // Keep track of the argument type and corresponding parameter index, + // so we can check for compatibility between the deduced A and A. + OriginalCallArgs.push_back( + Sema::OriginalCallArg(OrigParamType, DecomposedParam, ArgIdx, ArgType)); return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, ParamType, ArgType, Info, Deduced, TDF); } @@ -3364,31 +3433,17 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments( // Deduce an argument of type ParamType from an expression with index ArgIdx. auto DeduceCallArgument = [&](QualType ParamType, unsigned ArgIdx) { - Expr *Arg = Args[ArgIdx]; - QualType ArgType = Arg->getType(); - QualType OrigParamType = ParamType; - - unsigned TDF = 0; - if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams, - ParamType, ArgType, Arg, - TDF)) - return Sema::TDK_Success; - - // If we have nothing to deduce, we're done. + // C++ [demp.deduct.call]p1: (DR1391) + // Template argument deduction is done by comparing each function template + // parameter that contains template-parameters that participate in + // template argument deduction ... if (!hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType)) return Sema::TDK_Success; - // If the argument is an initializer list ... - if (InitListExpr *ILE = dyn_cast(Arg)) - return DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE, - Info, Deduced, TDF); - - // Keep track of the argument type and corresponding parameter index, - // so we can check for compatibility between the deduced A and A. - OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx, ArgType)); - - return DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams, ParamType, - ArgType, Info, Deduced, TDF); + // ... with the type of the corresponding argument + return DeduceTemplateArgumentsFromCallArgument( + *this, TemplateParams, ParamType, Args[ArgIdx], Info, Deduced, + OriginalCallArgs, /*Decomposed*/false, ArgIdx, /*TDF*/ 0); }; // Deduce template arguments from the function parameters. @@ -4054,8 +4109,6 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result, // Deduce type of TemplParam in Func(Init) SmallVector Deduced; Deduced.resize(1); - QualType InitType = Init->getType(); - unsigned TDF = 0; TemplateDeductionInfo Info(Loc, Depth); @@ -4070,12 +4123,21 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result, return DAR_Failed; }; + SmallVector OriginalCallArgs; + InitListExpr *InitList = dyn_cast(Init); if (InitList) { + // Notionally, we substitute std::initializer_list for 'auto' and deduce + // against that. Such deduction only succeeds if removing cv-qualifiers and + // references results in std::initializer_list. + if (!Type.getType().getNonReferenceType()->getAs()) + return DAR_Failed; + for (unsigned i = 0, e = InitList->getNumInits(); i < e; ++i) { - if (DeduceTemplateArgumentByListElement(*this, TemplateParamsSt.get(), - TemplArg, InitList->getInit(i), - Info, Deduced, TDF)) + if (DeduceTemplateArgumentsFromCallArgument( + *this, TemplateParamsSt.get(), TemplArg, InitList->getInit(i), + Info, Deduced, OriginalCallArgs, /*Decomposed*/ true, + /*ArgIdx*/ 0, /*TDF*/ 0)) return DeductionFailed(); } } else { @@ -4084,13 +4146,9 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result, return DAR_FailedAlreadyDiagnosed; } - if (AdjustFunctionParmAndArgTypesForDeduction( - *this, TemplateParamsSt.get(), FuncParam, InitType, Init, TDF)) - return DAR_Failed; - - if (DeduceTemplateArgumentsByTypeMatch(*this, TemplateParamsSt.get(), - FuncParam, InitType, Info, Deduced, - TDF)) + if (DeduceTemplateArgumentsFromCallArgument( + *this, TemplateParamsSt.get(), FuncParam, Init, Info, Deduced, + OriginalCallArgs, /*Decomposed*/ false, /*ArgIdx*/ 0, /*TDF*/ 0)) return DeductionFailed(); } @@ -4112,12 +4170,14 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result, // Check that the deduced argument type is compatible with the original // argument type per C++ [temp.deduct.call]p4. - if (!InitList && !Result.isNull() && - CheckOriginalCallArgDeduction(*this, - Sema::OriginalCallArg(FuncParam,0,InitType), - Result)) { - Result = QualType(); - return DeductionFailed(); + QualType DeducedA = InitList ? Deduced[0].getAsType() : Result; + for (const OriginalCallArg &OriginalArg : OriginalCallArgs) { + assert((bool)InitList == OriginalArg.DecomposedParam && + "decomposed non-init-list in auto deduction?"); + if (CheckOriginalCallArgDeduction(*this, OriginalArg, DeducedA)) { + Result = QualType(); + return DeductionFailed(); + } } return DAR_Succeeded; diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp index 7328dcb8760f..f4013b820641 100644 --- a/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1470,8 +1470,11 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { TSK_ImplicitInstantiation, /*Complain=*/true); - SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs, - TSK_ImplicitInstantiation); + // For nested local classes, we will instantiate the members when we + // reach the end of the outermost (non-nested) local class. + if (!D->isCXXClassMember()) + SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs, + TSK_ImplicitInstantiation); // This class may have local implicit instantiations that need to be // performed within this scope. @@ -3616,6 +3619,27 @@ TemplateDeclInstantiator::InitMethodInstantiation(CXXMethodDecl *New, return false; } +/// In the MS ABI, we need to instantiate default arguments of dllexported +/// default constructors along with the constructor definition. This allows IR +/// gen to emit a constructor closure which calls the default constructor with +/// its default arguments. +static void InstantiateDefaultCtorDefaultArgs(Sema &S, + CXXConstructorDecl *Ctor) { + assert(S.Context.getTargetInfo().getCXXABI().isMicrosoft() && + Ctor->isDefaultConstructor()); + unsigned NumParams = Ctor->getNumParams(); + if (NumParams == 0) + return; + DLLExportAttr *Attr = Ctor->getAttr(); + if (!Attr) + return; + for (unsigned I = 0; I != NumParams; ++I) { + (void)S.CheckCXXDefaultArgExpr(Attr->getLocation(), Ctor, + Ctor->getParamDecl(I)); + S.DiscardCleanupsInEvaluationContext(); + } +} + /// \brief Instantiate the definition of the given function from its /// template. /// @@ -3793,11 +3817,17 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, TemplateArgs)) return; - // If this is a constructor, instantiate the member initializers. - if (const CXXConstructorDecl *Ctor = - dyn_cast(PatternDecl)) { - InstantiateMemInitializers(cast(Function), Ctor, + if (CXXConstructorDecl *Ctor = dyn_cast(Function)) { + // If this is a constructor, instantiate the member initializers. + InstantiateMemInitializers(Ctor, cast(PatternDecl), TemplateArgs); + + // If this is an MS ABI dllexport default constructor, instantiate any + // default arguments. + if (Context.getTargetInfo().getCXXABI().isMicrosoft() && + Ctor->isDefaultConstructor()) { + InstantiateDefaultCtorDefaultArgs(*this, Ctor); + } } // Instantiate the function body. diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index fe2c53b77e1d..7f890051e641 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -8890,44 +8890,26 @@ void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) { } } -ASTReader::ASTReader( - Preprocessor &PP, ASTContext &Context, - const PCHContainerReader &PCHContainerRdr, - ArrayRef> Extensions, - StringRef isysroot, bool DisableValidation, - bool AllowASTWithCompilerErrors, - bool AllowConfigurationMismatch, bool ValidateSystemInputs, - bool UseGlobalIndex, - std::unique_ptr ReadTimer) - : Listener(DisableValidation ? - cast(new SimpleASTReaderListener(PP)) : - cast(new PCHValidator(PP, *this))), - DeserializationListener(nullptr), - OwnsDeserializationListener(false), SourceMgr(PP.getSourceManager()), - FileMgr(PP.getFileManager()), PCHContainerRdr(PCHContainerRdr), - Diags(PP.getDiagnostics()), SemaObj(nullptr), PP(PP), Context(Context), - Consumer(nullptr), ModuleMgr(PP.getFileManager(), PCHContainerRdr), - DummyIdResolver(PP), - ReadTimer(std::move(ReadTimer)), - PragmaMSStructState(-1), - PragmaMSPointersToMembersState(-1), - isysroot(isysroot), DisableValidation(DisableValidation), +ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context, + const PCHContainerReader &PCHContainerRdr, + ArrayRef> Extensions, + StringRef isysroot, bool DisableValidation, + bool AllowASTWithCompilerErrors, + bool AllowConfigurationMismatch, bool ValidateSystemInputs, + bool UseGlobalIndex, + std::unique_ptr ReadTimer) + : Listener(DisableValidation + ? cast(new SimpleASTReaderListener(PP)) + : cast(new PCHValidator(PP, *this))), + SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()), + PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP), + Context(Context), ModuleMgr(PP.getFileManager(), PCHContainerRdr), + DummyIdResolver(PP), ReadTimer(std::move(ReadTimer)), isysroot(isysroot), + DisableValidation(DisableValidation), AllowASTWithCompilerErrors(AllowASTWithCompilerErrors), AllowConfigurationMismatch(AllowConfigurationMismatch), ValidateSystemInputs(ValidateSystemInputs), - UseGlobalIndex(UseGlobalIndex), TriedLoadingGlobalIndex(false), - ProcessingUpdateRecords(false), - CurrSwitchCaseStmts(&SwitchCaseStmts), NumSLocEntriesRead(0), - TotalNumSLocEntries(0), NumStatementsRead(0), TotalNumStatements(0), - NumMacrosRead(0), TotalNumMacros(0), NumIdentifierLookups(0), - NumIdentifierLookupHits(0), NumSelectorsRead(0), - NumMethodPoolEntriesRead(0), NumMethodPoolLookups(0), - NumMethodPoolHits(0), NumMethodPoolTableLookups(0), - NumMethodPoolTableHits(0), TotalNumMethodPoolEntries(0), - NumLexicalDeclContextsRead(0), TotalLexicalDeclContexts(0), - NumVisibleDeclContextsRead(0), TotalVisibleDeclContexts(0), - TotalModulesSizeInBits(0), NumCurrentElementsDeserializing(0), - PassingDeclsToConsumer(false), ReadingKind(Read_None) { + UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) { SourceMgr.setExternalSLocEntrySource(this); for (const auto &Ext : Extensions) { diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index 6d79ea53b659..2a5eda436f09 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -800,17 +800,17 @@ void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) { void ASTWriter::WriteTypeAbbrevs() { using namespace llvm; - BitCodeAbbrev *Abv; + std::shared_ptr Abv; // Abbreviation for TYPE_EXT_QUAL - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::TYPE_EXT_QUAL)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 3)); // Quals - TypeExtQualAbbrev = Stream.EmitAbbrev(Abv); + TypeExtQualAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for TYPE_FUNCTION_PROTO - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::TYPE_FUNCTION_PROTO)); // FunctionType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ReturnType @@ -828,7 +828,7 @@ void ASTWriter::WriteTypeAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumParams Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Params - TypeFunctionProtoAbbrev = Stream.EmitAbbrev(Abv); + TypeFunctionProtoAbbrev = Stream.EmitAbbrev(std::move(Abv)); } //===----------------------------------------------------------------------===// @@ -1323,7 +1323,7 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, RecordData Record; // Metadata - auto *MetadataAbbrev = new BitCodeAbbrev(); + auto MetadataAbbrev = std::make_shared(); MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA)); MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Major MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Minor @@ -1333,7 +1333,7 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Timestamps MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Errors MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // SVN branch/tag - unsigned MetadataAbbrevCode = Stream.EmitAbbrev(MetadataAbbrev); + unsigned MetadataAbbrevCode = Stream.EmitAbbrev(std::move(MetadataAbbrev)); assert((!WritingModule || isysroot.empty()) && "writing module as a relocatable PCH?"); { @@ -1356,10 +1356,10 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, } // Module name - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_NAME}; Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name); } @@ -1376,10 +1376,10 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, .ModuleMapFileHomeIsCwd || WritingModule->Directory->getName() != StringRef(".")) { // Module directory. - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_DIRECTORY}; Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir); @@ -1586,11 +1586,11 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, // Original file name and file ID SourceManager &SM = Context.getSourceManager(); if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - auto *FileAbbrev = new BitCodeAbbrev(); + auto FileAbbrev = std::make_shared(); FileAbbrev->Add(BitCodeAbbrevOp(ORIGINAL_FILE)); FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File ID FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name - unsigned FileAbbrevCode = Stream.EmitAbbrev(FileAbbrev); + unsigned FileAbbrevCode = Stream.EmitAbbrev(std::move(FileAbbrev)); Record.clear(); Record.push_back(ORIGINAL_FILE); @@ -1604,10 +1604,10 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, // Original PCH directory if (!OutputFile.empty() && OutputFile != "-") { - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(ORIGINAL_PCH_DIR)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); SmallString<128> OutputPath(OutputFile); @@ -1644,7 +1644,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4); // Create input-file abbreviation. - auto *IFAbbrev = new BitCodeAbbrev(); + auto IFAbbrev = std::make_shared(); IFAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE)); IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size @@ -1652,7 +1652,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name - unsigned IFAbbrevCode = Stream.EmitAbbrev(IFAbbrev); + unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev)); // Get all ContentCache objects for files, sorted by whether the file is a // system one or not. System files go at the back, users files at the front. @@ -1712,13 +1712,13 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Stream.ExitBlock(); // Create input file offsets abbreviation. - auto *OffsetsAbbrev = new BitCodeAbbrev(); + auto OffsetsAbbrev = std::make_shared(); OffsetsAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_OFFSETS)); OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # input files OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # non-system // input files OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Array - unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(OffsetsAbbrev); + unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(std::move(OffsetsAbbrev)); // Write input file offsets. RecordData::value_type Record[] = {INPUT_FILE_OFFSETS, @@ -1735,7 +1735,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location @@ -1746,7 +1746,7 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumCreatedFIDs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 24)); // FirstDeclIndex Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumDecls - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } /// \brief Create an abbreviation for the SLocEntry that refers to a @@ -1754,14 +1754,14 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Characteristic Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Buffer name blob - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } /// \brief Create an abbreviation for the SLocEntry that refers to a @@ -1770,13 +1770,13 @@ static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream, bool Compressed) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED : SM_SLOC_BUFFER_BLOB)); if (Compressed) Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } /// \brief Create an abbreviation for the SLocEntry that refers to a macro @@ -1784,14 +1784,14 @@ static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream, static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Start location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // End location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Token length - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } namespace { @@ -1966,13 +1966,13 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { // Create a blob abbreviation using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned TableAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the header search table RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset, @@ -2136,12 +2136,12 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, // table is used for lazily loading source-location information. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets - unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), @@ -2391,13 +2391,13 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { // Write the offsets table for macro IDs. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS}; @@ -2421,14 +2421,14 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) { // Set up the abbreviation for unsigned InclusionAbbrev = 0; { - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_INCLUSION_DIRECTIVE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // filename length Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // in quotes Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // imported module Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - InclusionAbbrev = Stream.EmitAbbrev(Abbrev); + InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); } unsigned FirstPreprocessorEntityID @@ -2491,11 +2491,11 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) { // Write the offsets table for identifier IDs. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS, FirstPreprocessorEntityID - @@ -2549,7 +2549,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { // Write the abbreviations needed for the submodules block. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent @@ -2562,70 +2562,70 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExportWild... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ConfigMacrosExh... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned DefinitionAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned DefinitionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned UmbrellaAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned UmbrellaAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned HeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned HeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TOPHEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned TopHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TopHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_DIR)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_REQUIRES)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // State Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Feature - unsigned RequiresAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned RequiresAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_EXCLUDED_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TEXTUAL_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_TEXTUAL_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_LINK_LIBRARY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFIG_MACRO)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name - unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFLICT)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Other module Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Message - unsigned ConflictAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ConflictAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the submodule metadata block. RecordData::value_type Record[] = {getNumberOfModules(WritingModule), @@ -2891,12 +2891,12 @@ void ASTWriter::WriteTypeDeclOffsets() { using namespace llvm; // Write the type offsets array - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block - unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(), FirstTypeID - NUM_PREDEF_TYPE_IDS}; @@ -2904,12 +2904,12 @@ void ASTWriter::WriteTypeDeclOffsets() { } // Write the declaration offsets array - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(DECL_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of declarations Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base decl ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // declarations block - unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {DECL_OFFSET, DeclOffsets.size(), FirstDeclID - NUM_PREDEF_DECL_IDS}; @@ -2934,11 +2934,11 @@ void ASTWriter::WriteFileDeclIDsMap() { FileGroupedDeclIDs.push_back(LocDeclEntry.second); } - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(FILE_SORTED_DECLS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {FILE_SORTED_DECLS, FileGroupedDeclIDs.size()}; Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs)); @@ -3142,12 +3142,12 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) { } // Create a blob abbreviation - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(METHOD_POOL)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned MethodPoolAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned MethodPoolAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the method pool { @@ -3157,12 +3157,12 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) { } // Create a blob abbreviation for the selector table offsets. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SELECTOR_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the selector offsets table. { @@ -3452,11 +3452,11 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, } // Create a blob abbreviation - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_TABLE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the identifier table RecordData::value_type Record[] = {IDENTIFIER_TABLE, BucketOffset}; @@ -3464,12 +3464,12 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, } // Write the offsets table for identifier IDs. - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); #ifndef NDEBUG for (unsigned I = 0, N = IdentifierOffsets.size(); I != N; ++I) @@ -4025,11 +4025,11 @@ void ASTWriter::WriteObjCCategories() { // Emit the categories map. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(OBJC_CATEGORIES_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned AbbrevID = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevID = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {OBJC_CATEGORIES_MAP, CategoriesMap.size()}; Stream.EmitRecordWithBlob(AbbrevID, Record, @@ -4091,14 +4091,14 @@ void ASTWriter::WriteModuleFileExtension(Sema &SemaRef, Stream.EnterSubblock(EXTENSION_BLOCK_ID, 4); // Emit the metadata record abbreviation. - auto *Abv = new llvm::BitCodeAbbrev(); + auto Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(EXTENSION_METADATA)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - unsigned Abbrev = Stream.EmitAbbrev(Abv); + unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv)); // Emit the metadata record. RecordData Record; @@ -4221,29 +4221,10 @@ void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) { SelectorOffsets[ID - FirstSelectorID] = Offset; } -ASTWriter::ASTWriter( - llvm::BitstreamWriter &Stream, - ArrayRef> Extensions, - bool IncludeTimestamps) - : Stream(Stream), Context(nullptr), PP(nullptr), Chain(nullptr), - WritingModule(nullptr), IncludeTimestamps(IncludeTimestamps), - WritingAST(false), DoneWritingDeclsAndTypes(false), - ASTHasCompilerErrors(false), FirstDeclID(NUM_PREDEF_DECL_IDS), - NextDeclID(FirstDeclID), FirstTypeID(NUM_PREDEF_TYPE_IDS), - NextTypeID(FirstTypeID), FirstIdentID(NUM_PREDEF_IDENT_IDS), - NextIdentID(FirstIdentID), FirstMacroID(NUM_PREDEF_MACRO_IDS), - NextMacroID(FirstMacroID), FirstSubmoduleID(NUM_PREDEF_SUBMODULE_IDS), - NextSubmoduleID(FirstSubmoduleID), - FirstSelectorID(NUM_PREDEF_SELECTOR_IDS), NextSelectorID(FirstSelectorID), - NumStatements(0), NumMacros(0), - NumLexicalDeclContexts(0), NumVisibleDeclContexts(0), - TypeExtQualAbbrev(0), TypeFunctionProtoAbbrev(0), DeclParmVarAbbrev(0), - DeclContextLexicalAbbrev(0), DeclContextVisibleLookupAbbrev(0), - UpdateVisibleAbbrev(0), DeclRecordAbbrev(0), DeclTypedefAbbrev(0), - DeclVarAbbrev(0), DeclFieldAbbrev(0), DeclEnumAbbrev(0), - DeclObjCIvarAbbrev(0), DeclCXXMethodAbbrev(0), DeclRefExprAbbrev(0), - CharacterLiteralAbbrev(0), IntegerLiteralAbbrev(0), - ExprImplicitCastAbbrev(0) { +ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream, + ArrayRef> Extensions, + bool IncludeTimestamps) + : Stream(Stream), IncludeTimestamps(IncludeTimestamps) { for (const auto &Ext : Extensions) { if (auto Writer = Ext->createExtensionWriter(*this)) ModuleFileExtensionWriters.push_back(std::move(Writer)); @@ -4474,10 +4455,10 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, } } - auto *Abv = new llvm::BitCodeAbbrev(); + auto Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(Abv); + unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); { RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, @@ -4485,11 +4466,11 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, } // And a visible updates block for the translation unit. - Abv = new llvm::BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - UpdateVisibleAbbrev = Stream.EmitAbbrev(Abv); + UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); WriteDeclContextVisibleUpdate(TU); // If we have any extern "C" names, write out a visible update for them. @@ -4584,10 +4565,10 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // c++-base-specifiers-id:i32 // type-id:i32) // - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); SmallString<2048> Buffer; { llvm::raw_svector_ostream Out(Buffer); diff --git a/lib/Serialization/ASTWriterDecl.cpp b/lib/Serialization/ASTWriterDecl.cpp index ee220f00a81f..8e1480739a5f 100644 --- a/lib/Serialization/ASTWriterDecl.cpp +++ b/lib/Serialization/ASTWriterDecl.cpp @@ -1702,10 +1702,10 @@ void ASTDeclWriter::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { void ASTWriter::WriteDeclAbbrevs() { using namespace llvm; - BitCodeAbbrev *Abv; + std::shared_ptr Abv; // Abbreviation for DECL_FIELD - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_FIELD)); // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext @@ -1735,10 +1735,10 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc - DeclFieldAbbrev = Stream.EmitAbbrev(Abv); + DeclFieldAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_OBJC_IVAR - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_OBJC_IVAR)); // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext @@ -1771,10 +1771,10 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc - DeclObjCIvarAbbrev = Stream.EmitAbbrev(Abv); + DeclObjCIvarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_ENUM - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_ENUM)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration @@ -1820,10 +1820,10 @@ void ASTWriter::WriteDeclAbbrevs() { // DC Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalOffset Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // VisibleOffset - DeclEnumAbbrev = Stream.EmitAbbrev(Abv); + DeclEnumAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_RECORD - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_RECORD)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration @@ -1864,10 +1864,10 @@ void ASTWriter::WriteDeclAbbrevs() { // DC Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalOffset Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // VisibleOffset - DeclRecordAbbrev = Stream.EmitAbbrev(Abv); + DeclRecordAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_PARM_VAR - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_PARM_VAR)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration @@ -1911,10 +1911,10 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc - DeclParmVarAbbrev = Stream.EmitAbbrev(Abv); + DeclParmVarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_TYPEDEF - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_TYPEDEF)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration @@ -1940,10 +1940,10 @@ void ASTWriter::WriteDeclAbbrevs() { // TypedefDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc - DeclTypedefAbbrev = Stream.EmitAbbrev(Abv); + DeclTypedefAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_VAR - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_VAR)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration @@ -1989,10 +1989,10 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc - DeclVarAbbrev = Stream.EmitAbbrev(Abv); + DeclVarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_CXX_METHOD - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CXX_METHOD)); // RedeclarableDecl Abv->Add(BitCodeAbbrevOp(0)); // CanonicalDecl @@ -2047,10 +2047,10 @@ void ASTWriter::WriteDeclAbbrevs() { // Add an AbbrevOp for 'size then elements' and use it here. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); - DeclCXXMethodAbbrev = Stream.EmitAbbrev(Abv); + DeclCXXMethodAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_DECL_REF - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_DECL_REF)); //Stmt //Expr @@ -2070,10 +2070,10 @@ void ASTWriter::WriteDeclAbbrevs() { 1)); // RefersToEnclosingVariableOrCapture Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location - DeclRefExprAbbrev = Stream.EmitAbbrev(Abv); + DeclRefExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_INTEGER_LITERAL - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_INTEGER_LITERAL)); //Stmt //Expr @@ -2088,10 +2088,10 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location Abv->Add(BitCodeAbbrevOp(32)); // Bit Width Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Value - IntegerLiteralAbbrev = Stream.EmitAbbrev(Abv); + IntegerLiteralAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CHARACTER_LITERAL - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CHARACTER_LITERAL)); //Stmt //Expr @@ -2106,10 +2106,10 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind - CharacterLiteralAbbrev = Stream.EmitAbbrev(Abv); + CharacterLiteralAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_IMPLICIT_CAST - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_IMPLICIT_CAST)); // Stmt // Expr @@ -2124,17 +2124,17 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(0)); // PathSize Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 6)); // CastKind // ImplicitCastExpr - ExprImplicitCastAbbrev = Stream.EmitAbbrev(Abv); + ExprImplicitCastAbbrev = Stream.EmitAbbrev(std::move(Abv)); - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_LEXICAL)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - DeclContextLexicalAbbrev = Stream.EmitAbbrev(Abv); + DeclContextLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_VISIBLE)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - DeclContextVisibleLookupAbbrev = Stream.EmitAbbrev(Abv); + DeclContextVisibleLookupAbbrev = Stream.EmitAbbrev(std::move(Abv)); } /// isRequiredDecl - Check if this is a "required" Decl, which must be seen by diff --git a/lib/Serialization/GeneratePCH.cpp b/lib/Serialization/GeneratePCH.cpp index e1765dafd96f..7f1b75055b45 100644 --- a/lib/Serialization/GeneratePCH.cpp +++ b/lib/Serialization/GeneratePCH.cpp @@ -24,7 +24,7 @@ using namespace clang; PCHGenerator::PCHGenerator( const Preprocessor &PP, StringRef OutputFile, StringRef isysroot, std::shared_ptr Buffer, - ArrayRef> Extensions, + ArrayRef> Extensions, bool AllowASTWithErrors, bool IncludeTimestamps) : PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()), SemaPtr(nullptr), Buffer(Buffer), Stream(Buffer->Data), diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp index 9f986d54a989..ae5796ede126 100644 --- a/lib/Serialization/GlobalModuleIndex.cpp +++ b/lib/Serialization/GlobalModuleIndex.cpp @@ -744,11 +744,11 @@ void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) { } // Create a blob abbreviation - BitCodeAbbrev *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the identifier table uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset}; diff --git a/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp b/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp index a37ebc506d04..109897be2931 100644 --- a/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp @@ -49,10 +49,10 @@ class DynamicTypeChecker : public Checker> { ID.AddPointer(Reg); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; private: // The tracked region. @@ -91,9 +91,11 @@ void DynamicTypeChecker::reportTypeError(QualType DynamicType, C.emitReport(std::move(R)); } -PathDiagnosticPiece *DynamicTypeChecker::DynamicTypeBugVisitor::VisitNode( - const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +DynamicTypeChecker::DynamicTypeBugVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { ProgramStateRef State = N->getState(); ProgramStateRef StatePrev = PrevN->getState(); @@ -143,7 +145,8 @@ PathDiagnosticPiece *DynamicTypeChecker::DynamicTypeBugVisitor::VisitNode( // Generate the extra diagnostic. PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, OS.str(), true, nullptr); + return std::make_shared(Pos, OS.str(), true, + nullptr); } static bool hasDefinition(const ObjCObjectPointerType *ObjPtr) { diff --git a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp index a418c82f5a01..0891ea85a714 100644 --- a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp +++ b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -83,10 +83,10 @@ class DynamicTypePropagation: ID.AddPointer(Sym); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; private: // The tracked symbol. @@ -923,9 +923,11 @@ void DynamicTypePropagation::reportGenericsBug( C.emitReport(std::move(R)); } -PathDiagnosticPiece *DynamicTypePropagation::GenericsBugVisitor::VisitNode( - const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +DynamicTypePropagation::GenericsBugVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { ProgramStateRef state = N->getState(); ProgramStateRef statePrev = PrevN->getState(); @@ -975,7 +977,8 @@ PathDiagnosticPiece *DynamicTypePropagation::GenericsBugVisitor::VisitNode( // Generate the extra diagnostic. PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, OS.str(), true, nullptr); + return std::make_shared(Pos, OS.str(), true, + nullptr); } /// Register checkers. diff --git a/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp index d1dab6d27d45..af35c2b0e991 100644 --- a/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp @@ -123,10 +123,10 @@ public: assert(NonLocalizedString); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, + BugReport &BR) override; void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(NonLocalizedString); @@ -910,7 +910,7 @@ void NonLocalizedStringChecker::checkPostStmt(const ObjCStringLiteral *SL, setNonLocalizedState(sv, C); } -PathDiagnosticPiece * +std::shared_ptr NonLocalizedStringBRVisitor::VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred, BugReporterContext &BRC, BugReport &BR) { @@ -938,11 +938,11 @@ NonLocalizedStringBRVisitor::VisitNode(const ExplodedNode *Succ, if (!L.isValid() || !L.asLocation().isValid()) return nullptr; - auto *Piece = new PathDiagnosticEventPiece(L, - "Non-localized string literal here"); + auto Piece = std::make_shared( + L, "Non-localized string literal here"); Piece->addRange(LiteralExpr->getSourceRange()); - return Piece; + return std::move(Piece); } namespace { diff --git a/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp b/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp index d56ea6d689d3..e9ec7a0c4365 100644 --- a/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp +++ b/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp @@ -85,9 +85,11 @@ void MPIBugReporter::reportUnmatchedWait( BReporter.emitReport(std::move(Report)); } -PathDiagnosticPiece *MPIBugReporter::RequestNodeVisitor::VisitNode( - const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +MPIBugReporter::RequestNodeVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { if (IsNodeFound) return nullptr; @@ -104,7 +106,7 @@ PathDiagnosticPiece *MPIBugReporter::RequestNodeVisitor::VisitNode( PathDiagnosticLocation L = PathDiagnosticLocation::create(P, BRC.getSourceManager()); - return new PathDiagnosticEventPiece(L, ErrorText); + return std::make_shared(L, ErrorText); } return nullptr; diff --git a/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h b/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h index 8474d2d194e8..0ee91cca4793 100644 --- a/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h +++ b/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h @@ -90,10 +90,10 @@ private: ID.AddPointer(RequestRegion); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; private: const MemRegion *const RequestRegion; diff --git a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp index 86c827045e9a..f1aa16391db1 100644 --- a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp @@ -143,10 +143,10 @@ private: ID.AddPointer(Sym); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; }; }; } @@ -583,12 +583,10 @@ void MacOSKeychainAPIChecker::checkDeadSymbols(SymbolReaper &SR, C.addTransition(State, N); } - -PathDiagnosticPiece *MacOSKeychainAPIChecker::SecKeychainBugVisitor::VisitNode( - const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +MacOSKeychainAPIChecker::SecKeychainBugVisitor::VisitNode( + const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, + BugReport &BR) { const AllocationState *AS = N->getState()->get(Sym); if (!AS) return nullptr; @@ -610,7 +608,8 @@ PathDiagnosticPiece *MacOSKeychainAPIChecker::SecKeychainBugVisitor::VisitNode( const Expr *ArgExpr = CE->getArg(FunctionsToTrack[Idx].Param); PathDiagnosticLocation Pos(ArgExpr, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, "Data is allocated here."); + return std::make_shared(Pos, + "Data is allocated here."); } void ento::registerMacOSKeychainAPIChecker(CheckerManager &mgr) { diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index f7c4ea10c438..8e839a1d28fd 100644 --- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -463,10 +463,10 @@ private: SPrev->isAllocatedOfSizeZero()))); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; std::unique_ptr getEndPath(BugReporterContext &BRC, const ExplodedNode *EndPathNode, @@ -2668,11 +2668,9 @@ static SymbolRef findFailedReallocSymbol(ProgramStateRef currState, return nullptr; } -PathDiagnosticPiece * -MallocChecker::MallocBugVisitor::VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr MallocChecker::MallocBugVisitor::VisitNode( + const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, + BugReport &BR) { ProgramStateRef state = N->getState(); ProgramStateRef statePrev = PrevN->getState(); @@ -2740,7 +2738,7 @@ MallocChecker::MallocBugVisitor::VisitNode(const ExplodedNode *N, // Generate the extra diagnostic. PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, Msg, true, StackHint); + return std::make_shared(Pos, Msg, true, StackHint); } void MallocChecker::printState(raw_ostream &Out, ProgramStateRef State, diff --git a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index d96017a1f532..c14a87c9d2a4 100644 --- a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -153,10 +153,10 @@ private: ID.AddPointer(Region); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; private: // The tracked region. @@ -306,9 +306,11 @@ NullabilityChecker::getTrackRegion(SVal Val, bool CheckSuperRegion) const { return dyn_cast(Region); } -PathDiagnosticPiece *NullabilityChecker::NullabilityBugVisitor::VisitNode( - const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +NullabilityChecker::NullabilityBugVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) { ProgramStateRef State = N->getState(); ProgramStateRef StatePrev = PrevN->getState(); @@ -339,7 +341,8 @@ PathDiagnosticPiece *NullabilityChecker::NullabilityBugVisitor::VisitNode( // Generate the extra diagnostic. PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, InfoText, true, nullptr); + return std::make_shared(Pos, InfoText, true, + nullptr); } static Nullability getNullabilityAnnotation(QualType Type) { diff --git a/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp index e75d20897710..075ff09dcbfa 100644 --- a/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp @@ -73,10 +73,10 @@ public: : ReceiverSymbol(ReceiverSymbol), Satisfied(false) {} - PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, + BugReport &BR) override; void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(ReceiverSymbol); @@ -249,10 +249,10 @@ ObjCSuperDeallocChecker::isSuperDeallocMessage(const ObjCMethodCall &M) const { return M.getSelector() == SELdealloc; } -PathDiagnosticPiece *SuperDeallocBRVisitor::VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +SuperDeallocBRVisitor::VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, BugReport &BR) { if (Satisfied) return nullptr; @@ -275,7 +275,7 @@ PathDiagnosticPiece *SuperDeallocBRVisitor::VisitNode(const ExplodedNode *Succ, if (!L.isValid() || !L.asLocation().isValid()) return nullptr; - return new PathDiagnosticEventPiece( + return std::make_shared( L, "[super dealloc] called here"); } diff --git a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp index 204b0a6c468b..eb101e12af25 100644 --- a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp @@ -1773,10 +1773,10 @@ namespace { ID.AddPointer(Sym); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; std::unique_ptr getEndPath(BugReporterContext &BRC, const ExplodedNode *N, @@ -1899,10 +1899,9 @@ static bool isSynthesizedAccessor(const StackFrameContext *SFC) { return SFC->getAnalysisDeclContext()->isBodyAutosynthesized(); } -PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +CFRefReportVisitor::VisitNode(const ExplodedNode *N, const ExplodedNode *PrevN, + BugReporterContext &BRC, BugReport &BR) { // FIXME: We will eventually need to handle non-statement-based events // (__attribute__((cleanup))). if (!N->getLocation().getAs()) @@ -2026,7 +2025,7 @@ PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N, PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, os.str()); + return std::make_shared(Pos, os.str()); } // Gather up the effects that were performed on the object at this @@ -2203,7 +2202,7 @@ PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N, const Stmt *S = N->getLocation().castAs().getStmt(); PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - PathDiagnosticPiece *P = new PathDiagnosticEventPiece(Pos, os.str()); + auto P = std::make_shared(Pos, os.str()); // Add the range by scanning the children of the statement for any bindings // to Sym. @@ -2214,7 +2213,7 @@ PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N, break; } - return P; + return std::move(P); } namespace { diff --git a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp index b794d2f86bbe..5268bbf5562e 100644 --- a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp @@ -70,10 +70,10 @@ public: ID.Add(SFC); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, + BugReport &BR) override; }; class TestAfterDivZeroChecker @@ -94,10 +94,9 @@ public: REGISTER_SET_WITH_PROGRAMSTATE(DivZeroMap, ZeroState) -PathDiagnosticPiece *DivisionBRVisitor::VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +DivisionBRVisitor::VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred, + BugReporterContext &BRC, BugReport &BR) { if (Satisfied) return nullptr; @@ -128,7 +127,7 @@ PathDiagnosticPiece *DivisionBRVisitor::VisitNode(const ExplodedNode *Succ, if (!L.isValid() || !L.asLocation().isValid()) return nullptr; - return new PathDiagnosticEventPiece( + return std::make_shared( L, "Division with compared value made here"); } diff --git a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp index b4bfa0c03341..0b7a4865ddc2 100644 --- a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp @@ -91,10 +91,10 @@ private: return llvm::make_unique(L, BR.getDescription(), false); } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override; + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override; private: const MemRegion *Reg; @@ -335,7 +335,7 @@ void ValistChecker::checkVAListEndCall(const CallEvent &Call, C.addTransition(State); } -PathDiagnosticPiece *ValistChecker::ValistBugVisitor::VisitNode( +std::shared_ptr ValistChecker::ValistBugVisitor::VisitNode( const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) { ProgramStateRef State = N->getState(); @@ -358,7 +358,7 @@ PathDiagnosticPiece *ValistChecker::ValistBugVisitor::VisitNode( PathDiagnosticLocation Pos(S, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(Pos, Msg, true); + return std::make_shared(Pos, Msg, true); } #define REGISTER_CHECKER(name) \ diff --git a/lib/StaticAnalyzer/Core/BugReporter.cpp b/lib/StaticAnalyzer/Core/BugReporter.cpp index 53b4e699f7ad..2114033ba8b5 100644 --- a/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -111,15 +111,15 @@ static void removeRedundantMsgs(PathPieces &path) { // grabbing the front, processing it, and if we decide to keep it append // it to the end of the path. The entire path is processed in this way. for (unsigned i = 0; i < N; ++i) { - IntrusiveRefCntPtr piece(path.front()); + auto piece = std::move(path.front()); path.pop_front(); switch (piece->getKind()) { case PathDiagnosticPiece::Call: - removeRedundantMsgs(cast(piece)->path); + removeRedundantMsgs(cast(*piece).path); break; case PathDiagnosticPiece::Macro: - removeRedundantMsgs(cast(piece)->subPieces); + removeRedundantMsgs(cast(*piece).subPieces); break; case PathDiagnosticPiece::ControlFlow: break; @@ -130,13 +130,13 @@ static void removeRedundantMsgs(PathPieces &path) { if (PathDiagnosticEventPiece *nextEvent = dyn_cast(path.front().get())) { PathDiagnosticEventPiece *event = - cast(piece); + cast(piece.get()); // Check to see if we should keep one of the two pieces. If we // come up with a preference, record which piece to keep, and consume // another piece from the path. - if (PathDiagnosticEventPiece *pieceToKeep = - eventsDescribeSameCondition(event, nextEvent)) { - piece = pieceToKeep; + if (auto *pieceToKeep = + eventsDescribeSameCondition(event, nextEvent)) { + piece = std::move(pieceToKeep == event ? piece : path.front()); path.pop_front(); ++i; } @@ -146,7 +146,7 @@ static void removeRedundantMsgs(PathPieces &path) { case PathDiagnosticPiece::Note: break; } - path.push_back(piece); + path.push_back(std::move(piece)); } } @@ -166,38 +166,38 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R, for (unsigned i = 0 ; i < N ; ++i) { // Remove the front piece from the path. If it is still something we // want to keep once we are done, we will push it back on the end. - IntrusiveRefCntPtr piece(pieces.front()); + auto piece = std::move(pieces.front()); pieces.pop_front(); switch (piece->getKind()) { case PathDiagnosticPiece::Call: { - PathDiagnosticCallPiece *call = cast(piece); + auto &call = cast(*piece); // Check if the location context is interesting. - assert(LCM.count(&call->path)); - if (R->isInteresting(LCM[&call->path])) { + assert(LCM.count(&call.path)); + if (R->isInteresting(LCM[&call.path])) { containsSomethingInteresting = true; break; } - if (!removeUnneededCalls(call->path, R, LCM)) + if (!removeUnneededCalls(call.path, R, LCM)) continue; containsSomethingInteresting = true; break; } case PathDiagnosticPiece::Macro: { - PathDiagnosticMacroPiece *macro = cast(piece); - if (!removeUnneededCalls(macro->subPieces, R, LCM)) + auto ¯o = cast(*piece); + if (!removeUnneededCalls(macro.subPieces, R, LCM)) continue; containsSomethingInteresting = true; break; } case PathDiagnosticPiece::Event: { - PathDiagnosticEventPiece *event = cast(piece); + auto &event = cast(*piece); // We never throw away an event, but we do throw it away wholesale // as part of a path if we throw the entire path away. - containsSomethingInteresting |= !event->isPrunable(); + containsSomethingInteresting |= !event.isPrunable(); break; } case PathDiagnosticPiece::ControlFlow: @@ -207,7 +207,7 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R, break; } - pieces.push_back(piece); + pieces.push_back(std::move(piece)); } return containsSomethingInteresting; @@ -226,7 +226,7 @@ static void adjustCallLocations(PathPieces &Pieces, PathDiagnosticLocation *LastCallLocation = nullptr) { for (PathPieces::iterator I = Pieces.begin(), E = Pieces.end(); I != E; ++I) { - PathDiagnosticCallPiece *Call = dyn_cast(*I); + PathDiagnosticCallPiece *Call = dyn_cast(I->get()); if (!Call) { assert((*I)->getLocation().asLocation().isValid()); @@ -260,14 +260,13 @@ adjustCallLocations(PathPieces &Pieces, /// explicitly in a constructor or braced list. static void removeEdgesToDefaultInitializers(PathPieces &Pieces) { for (PathPieces::iterator I = Pieces.begin(), E = Pieces.end(); I != E;) { - if (PathDiagnosticCallPiece *C = dyn_cast(*I)) + if (auto *C = dyn_cast(I->get())) removeEdgesToDefaultInitializers(C->path); - if (PathDiagnosticMacroPiece *M = dyn_cast(*I)) + if (auto *M = dyn_cast(I->get())) removeEdgesToDefaultInitializers(M->subPieces); - if (PathDiagnosticControlFlowPiece *CF = - dyn_cast(*I)) { + if (auto *CF = dyn_cast(I->get())) { const Stmt *Start = CF->getStartLocation().asStmt(); const Stmt *End = CF->getEndLocation().asStmt(); if (Start && isa(Start)) { @@ -276,8 +275,8 @@ static void removeEdgesToDefaultInitializers(PathPieces &Pieces) { } else if (End && isa(End)) { PathPieces::iterator Next = std::next(I); if (Next != E) { - if (PathDiagnosticControlFlowPiece *NextCF = - dyn_cast(*Next)) { + if (auto *NextCF = + dyn_cast(Next->get())) { NextCF->setStartLocation(CF->getStartLocation()); } } @@ -295,10 +294,10 @@ static void removeEdgesToDefaultInitializers(PathPieces &Pieces) { /// Farm generated functions. static void removePiecesWithInvalidLocations(PathPieces &Pieces) { for (PathPieces::iterator I = Pieces.begin(), E = Pieces.end(); I != E;) { - if (PathDiagnosticCallPiece *C = dyn_cast(*I)) + if (auto *C = dyn_cast(I->get())) removePiecesWithInvalidLocations(C->path); - if (PathDiagnosticMacroPiece *M = dyn_cast(*I)) + if (auto *M = dyn_cast(I->get())) removePiecesWithInvalidLocations(M->subPieces); if (!(*I)->getLocation().isValid() || @@ -518,11 +517,9 @@ static bool GenerateVisitorsOnlyPathDiagnostic( BugReport *R = PDB.getBugReport(); while (const ExplodedNode *Pred = N->getFirstPred()) { - for (auto &V : visitors) { + for (auto &V : visitors) // Visit all the node pairs, but throw the path pieces away. - PathDiagnosticPiece *Piece = V->VisitNode(N, Pred, PDB, *R); - delete Piece; - } + V->VisitNode(N, Pred, PDB, *R); N = Pred; } @@ -536,12 +533,11 @@ static bool GenerateVisitorsOnlyPathDiagnostic( typedef std::pair StackDiagPair; typedef SmallVector StackDiagVector; -static void updateStackPiecesWithMessage(PathDiagnosticPiece *P, +static void updateStackPiecesWithMessage(PathDiagnosticPiece &P, StackDiagVector &CallStack) { // If the piece contains a special message, add it to all the call // pieces on the active stack. - if (PathDiagnosticEventPiece *ep = - dyn_cast(P)) { + if (PathDiagnosticEventPiece *ep = dyn_cast(&P)) { if (ep->hasCallStackHint()) for (StackDiagVector::iterator I = CallStack.begin(), @@ -582,13 +578,13 @@ static bool GenerateMinimalPathDiagnostic( do { if (Optional CE = P.getAs()) { - PathDiagnosticCallPiece *C = - PathDiagnosticCallPiece::construct(N, *CE, SMgr); + auto C = PathDiagnosticCallPiece::construct(N, *CE, SMgr); // Record the mapping from call piece to LocationContext. LCM[&C->path] = CE->getCalleeContext(); - PD.getActivePath().push_front(C); - PD.pushActivePath(&C->path); - CallStack.push_back(StackDiagPair(C, N)); + auto *P = C.get(); + PD.getActivePath().push_front(std::move(C)); + PD.pushActivePath(&P->path); + CallStack.push_back(StackDiagPair(P, N)); break; } @@ -604,7 +600,7 @@ static bool GenerateMinimalPathDiagnostic( // a new PathDiagnosticCallPiece. PathDiagnosticCallPiece *C; if (VisitedEntireCall) { - C = cast(PD.getActivePath().front()); + C = cast(PD.getActivePath().front().get()); } else { const Decl *Caller = CE->getLocationContext()->getDecl(); C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller); @@ -649,8 +645,9 @@ static bool GenerateMinimalPathDiagnostic( os << "Control jumps to line " << End.asLocation().getExpansionLineNumber(); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); break; } @@ -701,14 +698,16 @@ static bool GenerateMinimalPathDiagnostic( break; } } - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } else { os << "'Default' branch taken. "; const PathDiagnosticLocation &End = PDB.ExecutionContinues(os, N); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } break; @@ -719,8 +718,9 @@ static bool GenerateMinimalPathDiagnostic( std::string sbuf; llvm::raw_string_ostream os(sbuf); PathDiagnosticLocation End = PDB.ExecutionContinues(os, N); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); break; } @@ -741,8 +741,9 @@ static bool GenerateMinimalPathDiagnostic( if (const Stmt *S = End.asStmt()) End = PDB.getEnclosingStmtLocation(S); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); break; } @@ -764,15 +765,17 @@ static bool GenerateMinimalPathDiagnostic( PathDiagnosticLocation End(B->getLHS(), SMgr, LC); PathDiagnosticLocation Start = PathDiagnosticLocation::createOperatorLoc(B, SMgr); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } else { os << "true"; PathDiagnosticLocation Start(B->getLHS(), SMgr, LC); PathDiagnosticLocation End = PDB.ExecutionContinues(N); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } } else { @@ -783,16 +786,18 @@ static bool GenerateMinimalPathDiagnostic( os << "false"; PathDiagnosticLocation Start(B->getLHS(), SMgr, LC); PathDiagnosticLocation End = PDB.ExecutionContinues(N); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } else { os << "true"; PathDiagnosticLocation End(B->getLHS(), SMgr, LC); PathDiagnosticLocation Start = PathDiagnosticLocation::createOperatorLoc(B, SMgr); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } } @@ -810,8 +815,9 @@ static bool GenerateMinimalPathDiagnostic( if (const Stmt *S = End.asStmt()) End = PDB.getEnclosingStmtLocation(S); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } else { PathDiagnosticLocation End = PDB.ExecutionContinues(N); @@ -819,8 +825,9 @@ static bool GenerateMinimalPathDiagnostic( if (const Stmt *S = End.asStmt()) End = PDB.getEnclosingStmtLocation(S); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, "Loop condition is false. Exiting loop")); + PD.getActivePath().push_front( + std::make_shared( + Start, End, "Loop condition is false. Exiting loop")); } break; @@ -837,16 +844,18 @@ static bool GenerateMinimalPathDiagnostic( if (const Stmt *S = End.asStmt()) End = PDB.getEnclosingStmtLocation(S); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, os.str())); + PD.getActivePath().push_front( + std::make_shared(Start, End, + os.str())); } else { PathDiagnosticLocation End = PDB.ExecutionContinues(N); if (const Stmt *S = End.asStmt()) End = PDB.getEnclosingStmtLocation(S); - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, "Loop condition is true. Entering loop body")); + PD.getActivePath().push_front( + std::make_shared( + Start, End, "Loop condition is true. Entering loop body")); } break; @@ -859,11 +868,13 @@ static bool GenerateMinimalPathDiagnostic( End = PDB.getEnclosingStmtLocation(S); if (*(Src->succ_begin()+1) == Dst) - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, "Taking false branch")); + PD.getActivePath().push_front( + std::make_shared( + Start, End, "Taking false branch")); else - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece( - Start, End, "Taking true branch")); + PD.getActivePath().push_front( + std::make_shared( + Start, End, "Taking true branch")); break; } @@ -875,9 +886,9 @@ static bool GenerateMinimalPathDiagnostic( // Add diagnostic pieces from custom visitors. BugReport *R = PDB.getBugReport(); for (auto &V : visitors) { - if (PathDiagnosticPiece *p = V->VisitNode(N, NextNode, PDB, *R)) { - PD.getActivePath().push_front(p); - updateStackPiecesWithMessage(p, CallStack); + if (auto p = V->VisitNode(N, NextNode, PDB, *R)) { + updateStackPiecesWithMessage(*p, CallStack); + PD.getActivePath().push_front(std::move(p)); } } } @@ -1118,7 +1129,9 @@ void EdgeBuilder::rawAddEdge(PathDiagnosticLocation NewLoc) { PrevLocClean.asLocation().getExpansionLoc()) return; - PD.getActivePath().push_front(new PathDiagnosticControlFlowPiece(NewLocClean, PrevLocClean)); + PD.getActivePath().push_front( + std::make_shared(NewLocClean, + PrevLocClean)); PrevLoc = NewLoc; } @@ -1423,16 +1436,16 @@ static bool GenerateExtensivePathDiagnostic( N->getLocationContext()); } - PathDiagnosticCallPiece *C = - PathDiagnosticCallPiece::construct(N, *CE, SM); + auto C = PathDiagnosticCallPiece::construct(N, *CE, SM); LCM[&C->path] = CE->getCalleeContext(); EB.addEdge(C->callReturn, /*AlwaysAdd=*/true, /*IsPostJump=*/true); EB.flushLocations(); - PD.getActivePath().push_front(C); - PD.pushActivePath(&C->path); - CallStack.push_back(StackDiagPair(C, N)); + auto *P = C.get(); + PD.getActivePath().push_front(std::move(C)); + PD.pushActivePath(&P->path); + CallStack.push_back(StackDiagPair(P, N)); break; } @@ -1458,7 +1471,7 @@ static bool GenerateExtensivePathDiagnostic( // a new PathDiagnosticCallPiece. PathDiagnosticCallPiece *C; if (VisitedEntireCall) { - C = cast(PD.getActivePath().front()); + C = cast(PD.getActivePath().front().get()); } else { const Decl *Caller = CE->getLocationContext()->getDecl(); C = PathDiagnosticCallPiece::construct(PD.getActivePath(), Caller); @@ -1505,13 +1518,12 @@ static bool GenerateExtensivePathDiagnostic( else if (const WhileStmt *WS = dyn_cast(Loop)) CS = dyn_cast(WS->getBody()); - PathDiagnosticEventPiece *p = - new PathDiagnosticEventPiece(L, - "Looping back to the head of the loop"); + auto p = std::make_shared( + L, "Looping back to the head of the loop"); p->setPrunable(true); EB.addEdge(p->getLocation(), true); - PD.getActivePath().push_front(p); + PD.getActivePath().push_front(std::move(p)); if (CS) { PathDiagnosticLocation BL = @@ -1533,12 +1545,12 @@ static bool GenerateExtensivePathDiagnostic( N), Term)) { PathDiagnosticLocation L(Term, SM, PDB.LC); - PathDiagnosticEventPiece *PE = - new PathDiagnosticEventPiece(L, "Loop body executed 0 times"); + auto PE = std::make_shared( + L, "Loop body executed 0 times"); PE->setPrunable(true); EB.addEdge(PE->getLocation(), true); - PD.getActivePath().push_front(PE); + PD.getActivePath().push_front(std::move(PE)); } // In any case, add the terminator as the current statement @@ -1573,11 +1585,11 @@ static bool GenerateExtensivePathDiagnostic( // Add pieces from custom visitors. BugReport *R = PDB.getBugReport(); for (auto &V : visitors) { - if (PathDiagnosticPiece *p = V->VisitNode(N, NextNode, PDB, *R)) { + if (auto p = V->VisitNode(N, NextNode, PDB, *R)) { const PathDiagnosticLocation &Loc = p->getLocation(); EB.addEdge(Loc, true); - PD.getActivePath().push_front(p); - updateStackPiecesWithMessage(p, CallStack); + updateStackPiecesWithMessage(*p, CallStack); + PD.getActivePath().push_front(std::move(p)); if (const Stmt *S = Loc.asStmt()) EB.addExtendedContext(PDB.getEnclosingStmtLocation(S).asStmt()); @@ -1610,8 +1622,8 @@ static void addEdgeToPath(PathPieces &path, if (NewLoc.asStmt() && NewLoc.asStmt() == PrevLoc.asStmt()) return; - path.push_front(new PathDiagnosticControlFlowPiece(NewLoc, - PrevLoc)); + path.push_front( + std::make_shared(NewLoc, PrevLoc)); PrevLoc = NewLoc; } @@ -1678,7 +1690,7 @@ static bool GenerateAlternateExtensivePathDiagnostic( // Since we just transferred the path over to the call piece, // reset the mapping from active to location context. assert(PD.getActivePath().size() == 1 && - PD.getActivePath().front() == C); + PD.getActivePath().front().get() == C); LCM[&PD.getActivePath()] = nullptr; // Record the location context mapping for the path within @@ -1729,20 +1741,20 @@ static bool GenerateAlternateExtensivePathDiagnostic( // We are descending into a call (backwards). Construct // a new call piece to contain the path pieces for that call. - PathDiagnosticCallPiece *C = - PathDiagnosticCallPiece::construct(N, *CE, SM); + auto C = PathDiagnosticCallPiece::construct(N, *CE, SM); // Record the location context for this call piece. LCM[&C->path] = CE->getCalleeContext(); // Add the edge to the return site. addEdgeToPath(PD.getActivePath(), PrevLoc, C->callReturn, PDB.LC); - PD.getActivePath().push_front(C); + auto *P = C.get(); + PD.getActivePath().push_front(std::move(C)); PrevLoc.invalidate(); // Make the contents of the call the active path for now. - PD.pushActivePath(&C->path); - CallStack.push_back(StackDiagPair(C, N)); + PD.pushActivePath(&P->path); + CallStack.push_back(StackDiagPair(P, N)); break; } @@ -1797,13 +1809,13 @@ static bool GenerateAlternateExtensivePathDiagnostic( } // do-while statements are explicitly excluded here - PathDiagnosticEventPiece *p = - new PathDiagnosticEventPiece(L, "Looping back to the head " - "of the loop"); + auto p = std::make_shared( + L, "Looping back to the head " + "of the loop"); p->setPrunable(true); addEdgeToPath(PD.getActivePath(), PrevLoc, p->getLocation(), PDB.LC); - PD.getActivePath().push_front(p); + PD.getActivePath().push_front(std::move(p)); if (const CompoundStmt *CS = dyn_cast_or_null(Body)) { addEdgeToPath(PD.getActivePath(), PrevLoc, @@ -1841,12 +1853,11 @@ static bool GenerateAlternateExtensivePathDiagnostic( if (str) { PathDiagnosticLocation L(TermCond ? TermCond : Term, SM, PDB.LC); - PathDiagnosticEventPiece *PE = - new PathDiagnosticEventPiece(L, str); + auto PE = std::make_shared(L, str); PE->setPrunable(true); addEdgeToPath(PD.getActivePath(), PrevLoc, PE->getLocation(), PDB.LC); - PD.getActivePath().push_front(PE); + PD.getActivePath().push_front(std::move(PE)); } } else if (isa(Term) || isa(Term) || isa(Term)) { @@ -1863,10 +1874,10 @@ static bool GenerateAlternateExtensivePathDiagnostic( // Add pieces from custom visitors. for (auto &V : visitors) { - if (PathDiagnosticPiece *p = V->VisitNode(N, NextNode, PDB, *report)) { + if (auto p = V->VisitNode(N, NextNode, PDB, *report)) { addEdgeToPath(PD.getActivePath(), PrevLoc, p->getLocation(), PDB.LC); - PD.getActivePath().push_front(p); - updateStackPiecesWithMessage(p, CallStack); + updateStackPiecesWithMessage(*p, CallStack); + PD.getActivePath().push_front(std::move(p)); } } } @@ -1973,7 +1984,7 @@ static void addContextEdges(PathPieces &pieces, SourceManager &SM, for (PathPieces::iterator I = pieces.begin(), E = Prev; I != E; Prev = I, ++I) { PathDiagnosticControlFlowPiece *Piece = - dyn_cast(*I); + dyn_cast(I->get()); if (!Piece) continue; @@ -2014,8 +2025,7 @@ static void addContextEdges(PathPieces &pieces, SourceManager &SM, // Try to extend the previous edge if it's at the same level as the source // context. if (Prev != E) { - PathDiagnosticControlFlowPiece *PrevPiece = - dyn_cast(*Prev); + auto *PrevPiece = dyn_cast(Prev->get()); if (PrevPiece) { if (const Stmt *PrevSrc = getLocStmt(PrevPiece->getStartLocation())) { @@ -2031,8 +2041,10 @@ static void addContextEdges(PathPieces &pieces, SourceManager &SM, // Otherwise, split the current edge into a context edge and a // subexpression edge. Note that the context statement may itself have // context. - Piece = new PathDiagnosticControlFlowPiece(SrcLoc, DstContext); - I = pieces.insert(I, Piece); + auto P = + std::make_shared(SrcLoc, DstContext); + Piece = P.get(); + I = pieces.insert(I, std::move(P)); } } } @@ -2051,8 +2063,7 @@ static void addContextEdges(PathPieces &pieces, SourceManager &SM, static void simplifySimpleBranches(PathPieces &pieces) { for (PathPieces::iterator I = pieces.begin(), E = pieces.end(); I != E; ++I) { - PathDiagnosticControlFlowPiece *PieceI = - dyn_cast(*I); + auto *PieceI = dyn_cast(I->get()); if (!PieceI) continue; @@ -2073,7 +2084,7 @@ static void simplifySimpleBranches(PathPieces &pieces) { if (NextI == E) break; - PathDiagnosticEventPiece *EV = dyn_cast(*NextI); + auto *EV = dyn_cast(NextI->get()); if (EV) { StringRef S = EV->getString(); if (S == StrEnteringLoop || S == StrLoopBodyZero || @@ -2084,7 +2095,7 @@ static void simplifySimpleBranches(PathPieces &pieces) { break; } - PieceNextI = dyn_cast(*NextI); + PieceNextI = dyn_cast(NextI->get()); break; } @@ -2176,7 +2187,7 @@ static void removeContextCycles(PathPieces &Path, SourceManager &SM, for (PathPieces::iterator I = Path.begin(), E = Path.end(); I != E; ) { // Pattern match the current piece and its successor. PathDiagnosticControlFlowPiece *PieceI = - dyn_cast(*I); + dyn_cast(I->get()); if (!PieceI) { ++I; @@ -2191,14 +2202,14 @@ static void removeContextCycles(PathPieces &Path, SourceManager &SM, break; PathDiagnosticControlFlowPiece *PieceNextI = - dyn_cast(*NextI); + dyn_cast(NextI->get()); if (!PieceNextI) { - if (isa(*NextI)) { + if (isa(NextI->get())) { ++NextI; if (NextI == E) break; - PieceNextI = dyn_cast(*NextI); + PieceNextI = dyn_cast(NextI->get()); } if (!PieceNextI) { @@ -2251,8 +2262,7 @@ static void removePunyEdges(PathPieces &path, erased = false; - PathDiagnosticControlFlowPiece *PieceI = - dyn_cast(*I); + auto *PieceI = dyn_cast(I->get()); if (!PieceI) continue; @@ -2299,8 +2309,7 @@ static void removePunyEdges(PathPieces &path, static void removeIdenticalEvents(PathPieces &path) { for (PathPieces::iterator I = path.begin(), E = path.end(); I != E; ++I) { - PathDiagnosticEventPiece *PieceI = - dyn_cast(*I); + auto *PieceI = dyn_cast(I->get()); if (!PieceI) continue; @@ -2309,8 +2318,7 @@ static void removeIdenticalEvents(PathPieces &path) { if (NextI == E) return; - PathDiagnosticEventPiece *PieceNextI = - dyn_cast(*NextI); + auto *PieceNextI = dyn_cast(NextI->get()); if (!PieceNextI) continue; @@ -2332,7 +2340,7 @@ static bool optimizeEdges(PathPieces &path, SourceManager &SM, for (PathPieces::iterator I = path.begin(), E = path.end(); I != E; ) { // Optimize subpaths. - if (PathDiagnosticCallPiece *CallI = dyn_cast(*I)){ + if (auto *CallI = dyn_cast(I->get())) { // Record the fact that a call has been optimized so we only do the // effort once. if (!OCS.count(CallI)) { @@ -2344,8 +2352,7 @@ static bool optimizeEdges(PathPieces &path, SourceManager &SM, } // Pattern match the current piece and its successor. - PathDiagnosticControlFlowPiece *PieceI = - dyn_cast(*I); + auto *PieceI = dyn_cast(I->get()); if (!PieceI) { ++I; @@ -2361,8 +2368,7 @@ static bool optimizeEdges(PathPieces &path, SourceManager &SM, if (NextI == E) break; - PathDiagnosticControlFlowPiece *PieceNextI = - dyn_cast(*NextI); + auto *PieceNextI = dyn_cast(NextI->get()); if (!PieceNextI) { ++I; @@ -2511,8 +2517,8 @@ static bool optimizeEdges(PathPieces &path, SourceManager &SM, static void dropFunctionEntryEdge(PathPieces &Path, LocationContextMap &LCM, SourceManager &SM) { - const PathDiagnosticControlFlowPiece *FirstEdge = - dyn_cast(Path.front()); + const auto *FirstEdge = + dyn_cast(Path.front().get()); if (!FirstEdge) return; @@ -2967,11 +2973,11 @@ bool TrimmedGraph::popNextReportGraph(ReportGraph &GraphWrapper) { /// CompactPathDiagnostic - This function postprocesses a PathDiagnostic object /// and collapses PathDiagosticPieces that are expanded by macros. static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { - typedef std::vector, - SourceLocation> > MacroStackTy; + typedef std::vector< + std::pair, SourceLocation>> + MacroStackTy; - typedef std::vector > - PiecesTy; + typedef std::vector> PiecesTy; MacroStackTy MacroStack; PiecesTy Pieces; @@ -2979,10 +2985,10 @@ static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { for (PathPieces::const_iterator I = path.begin(), E = path.end(); I!=E; ++I) { - PathDiagnosticPiece *piece = I->get(); + auto &piece = *I; // Recursively compact calls. - if (PathDiagnosticCallPiece *call=dyn_cast(piece)){ + if (auto *call = dyn_cast(&*piece)) { CompactPathDiagnostic(call->path, SM); } @@ -3011,7 +3017,7 @@ static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { // We aren't in the same group. Are we descending into a new macro // or are part of an old one? - IntrusiveRefCntPtr MacroGroup; + std::shared_ptr MacroGroup; SourceLocation ParentInstantiationLoc = InstantiationLoc.isMacroID() ? SM.getExpansionLoc(Loc) : @@ -3034,8 +3040,7 @@ static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { if (!MacroGroup || ParentInstantiationLoc == MacroStack.back().second) { // Create a new macro group and add it to the stack. - PathDiagnosticMacroPiece *NewGroup = - new PathDiagnosticMacroPiece( + auto NewGroup = std::make_shared( PathDiagnosticLocation::createSingleLocation(piece->getLocation())); if (MacroGroup) @@ -3477,13 +3482,12 @@ void BugReporter::FlushReport(BugReport *exampleReport, for (auto I = exampleReport->getNotes().rbegin(), E = exampleReport->getNotes().rend(); I != E; ++I) { PathDiagnosticNotePiece *Piece = I->get(); - PathDiagnosticEventPiece *ConvertedPiece = - new PathDiagnosticEventPiece(Piece->getLocation(), - Piece->getString()); + auto ConvertedPiece = std::make_shared( + Piece->getLocation(), Piece->getString()); for (const auto &R: Piece->getRanges()) ConvertedPiece->addRange(R); - Pieces.push_front(ConvertedPiece); + Pieces.push_front(std::move(ConvertedPiece)); } } else { for (auto I = exampleReport->getNotes().rbegin(), diff --git a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 7f20f0d7703e..c3c3f2ff76ec 100644 --- a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -229,10 +229,9 @@ public: return Options.shouldAvoidSuppressingNullArgumentPaths(); } - PathDiagnosticPiece *visitNodeInitial(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { + std::shared_ptr + visitNodeInitial(const ExplodedNode *N, const ExplodedNode *PrevN, + BugReporterContext &BRC, BugReport &BR) { // Only print a message at the interesting return statement. if (N->getLocationContext() != StackFrame) return nullptr; @@ -328,13 +327,12 @@ public: if (!L.isValid() || !L.asLocation().isValid()) return nullptr; - return new PathDiagnosticEventPiece(L, Out.str()); + return std::make_shared(L, Out.str()); } - PathDiagnosticPiece *visitNodeMaybeUnsuppress(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { + std::shared_ptr + visitNodeMaybeUnsuppress(const ExplodedNode *N, const ExplodedNode *PrevN, + BugReporterContext &BRC, BugReport &BR) { #ifndef NDEBUG ExprEngine &Eng = BRC.getBugReporter().getEngine(); AnalyzerOptions &Options = Eng.getAnalysisManager().options; @@ -384,10 +382,10 @@ public: return nullptr; } - PathDiagnosticPiece *VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) override { + std::shared_ptr VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, + BugReport &BR) override { switch (Mode) { case Initial: return visitNodeInitial(N, PrevN, BRC, BR); @@ -448,10 +446,10 @@ static bool isInitializationOfVar(const ExplodedNode *N, const VarRegion *VR) { return FrameSpace->getStackFrame() == LCtx->getCurrentStackFrame(); } -PathDiagnosticPiece *FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, - const ExplodedNode *Pred, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, + const ExplodedNode *Pred, + BugReporterContext &BRC, BugReport &BR) { if (Satisfied) return nullptr; @@ -706,7 +704,7 @@ PathDiagnosticPiece *FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, if (!L.isValid() || !L.asLocation().isValid()) return nullptr; - return new PathDiagnosticEventPiece(L, os.str()); + return std::make_shared(L, os.str()); } void TrackConstraintBRVisitor::Profile(llvm::FoldingSetNodeID &ID) const { @@ -728,11 +726,10 @@ bool TrackConstraintBRVisitor::isUnderconstrained(const ExplodedNode *N) const { return (bool)N->getState()->assume(Constraint, !Assumption); } -PathDiagnosticPiece * +std::shared_ptr TrackConstraintBRVisitor::VisitNode(const ExplodedNode *N, const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { + BugReporterContext &BRC, BugReport &BR) { if (IsSatisfied) return nullptr; @@ -775,9 +772,9 @@ TrackConstraintBRVisitor::VisitNode(const ExplodedNode *N, if (!L.isValid()) return nullptr; - PathDiagnosticEventPiece *X = new PathDiagnosticEventPiece(L, os.str()); + auto X = std::make_shared(L, os.str()); X->setTag(getTag()); - return X; + return std::move(X); } return nullptr; @@ -808,7 +805,7 @@ const char *SuppressInlineDefensiveChecksVisitor::getTag() { return "IDCVisitor"; } -PathDiagnosticPiece * +std::shared_ptr SuppressInlineDefensiveChecksVisitor::VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred, BugReporterContext &BRC, @@ -1121,10 +1118,10 @@ const Expr *NilReceiverBRVisitor::getNilReceiver(const Stmt *S, return nullptr; } -PathDiagnosticPiece *NilReceiverBRVisitor::VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +NilReceiverBRVisitor::VisitNode(const ExplodedNode *N, + const ExplodedNode *PrevN, + BugReporterContext &BRC, BugReport &BR) { Optional P = N->getLocationAs(); if (!P) return nullptr; @@ -1155,7 +1152,7 @@ PathDiagnosticPiece *NilReceiverBRVisitor::VisitNode(const ExplodedNode *N, // Issue a message saying that the method was skipped. PathDiagnosticLocation L(Receiver, BRC.getSourceManager(), N->getLocationContext()); - return new PathDiagnosticEventPiece(L, OS.str()); + return std::make_shared(L, OS.str()); } // Registers every VarDecl inside a Stmt with a last store visitor. @@ -1204,23 +1201,22 @@ const char *ConditionBRVisitor::getTag() { return "ConditionBRVisitor"; } -PathDiagnosticPiece *ConditionBRVisitor::VisitNode(const ExplodedNode *N, - const ExplodedNode *Prev, - BugReporterContext &BRC, - BugReport &BR) { - PathDiagnosticPiece *piece = VisitNodeImpl(N, Prev, BRC, BR); +std::shared_ptr +ConditionBRVisitor::VisitNode(const ExplodedNode *N, const ExplodedNode *Prev, + BugReporterContext &BRC, BugReport &BR) { + auto piece = VisitNodeImpl(N, Prev, BRC, BR); if (piece) { piece->setTag(getTag()); - if (PathDiagnosticEventPiece *ev=dyn_cast(piece)) + if (auto *ev = dyn_cast(piece.get())) ev->setPrunable(true, /* override */ false); } return piece; } -PathDiagnosticPiece *ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N, - const ExplodedNode *Prev, - BugReporterContext &BRC, - BugReport &BR) { +std::shared_ptr +ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N, + const ExplodedNode *Prev, + BugReporterContext &BRC, BugReport &BR) { ProgramPoint progPoint = N->getLocation(); ProgramStateRef CurrentState = N->getState(); @@ -1263,13 +1259,9 @@ PathDiagnosticPiece *ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N, return nullptr; } -PathDiagnosticPiece * -ConditionBRVisitor::VisitTerminator(const Stmt *Term, - const ExplodedNode *N, - const CFGBlock *srcBlk, - const CFGBlock *dstBlk, - BugReport &R, - BugReporterContext &BRC) { +std::shared_ptr ConditionBRVisitor::VisitTerminator( + const Stmt *Term, const ExplodedNode *N, const CFGBlock *srcBlk, + const CFGBlock *dstBlk, BugReport &R, BugReporterContext &BRC) { const Expr *Cond = nullptr; // In the code below, Term is a CFG terminator and Cond is a branch condition @@ -1322,11 +1314,9 @@ ConditionBRVisitor::VisitTerminator(const Stmt *Term, return VisitTrueTest(Cond, tookTrue, BRC, R, N); } -PathDiagnosticPiece * -ConditionBRVisitor::VisitTrueTest(const Expr *Cond, - bool tookTrue, - BugReporterContext &BRC, - BugReport &R, +std::shared_ptr +ConditionBRVisitor::VisitTrueTest(const Expr *Cond, bool tookTrue, + BugReporterContext &BRC, BugReport &R, const ExplodedNode *N) { // These will be modified in code below, but we need to preserve the original // values in case we want to throw the generic message. @@ -1339,13 +1329,13 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, default: break; case Stmt::BinaryOperatorClass: - if (PathDiagnosticPiece *P = VisitTrueTest( - Cond, cast(CondTmp), tookTrueTmp, BRC, R, N)) + if (auto P = VisitTrueTest(Cond, cast(CondTmp), + tookTrueTmp, BRC, R, N)) return P; break; case Stmt::DeclRefExprClass: - if (PathDiagnosticPiece *P = VisitTrueTest( - Cond, cast(CondTmp), tookTrueTmp, BRC, R, N)) + if (auto P = VisitTrueTest(Cond, cast(CondTmp), + tookTrueTmp, BRC, R, N)) return P; break; case Stmt::UnaryOperatorClass: { @@ -1368,9 +1358,8 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, if (!Loc.isValid() || !Loc.asLocation().isValid()) return nullptr; - PathDiagnosticEventPiece *Event = new PathDiagnosticEventPiece( + return std::make_shared( Loc, tookTrue ? GenericTrueMessage : GenericFalseMessage); - return Event; } bool ConditionBRVisitor::patternMatch(const Expr *Ex, @@ -1470,13 +1459,10 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex, return false; } -PathDiagnosticPiece * -ConditionBRVisitor::VisitTrueTest(const Expr *Cond, - const BinaryOperator *BExpr, - const bool tookTrue, - BugReporterContext &BRC, - BugReport &R, - const ExplodedNode *N) { +std::shared_ptr +ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr, + const bool tookTrue, BugReporterContext &BRC, + BugReport &R, const ExplodedNode *N) { bool shouldInvert = false; Optional shouldPrune; @@ -1549,20 +1535,15 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, Out << (shouldInvert ? LhsString : RhsString); const LocationContext *LCtx = N->getLocationContext(); PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx); - PathDiagnosticEventPiece *event = - new PathDiagnosticEventPiece(Loc, Out.str()); + auto event = std::make_shared(Loc, Out.str()); if (shouldPrune.hasValue()) event->setPrunable(shouldPrune.getValue()); return event; } -PathDiagnosticPiece * -ConditionBRVisitor::VisitConditionVariable(StringRef LhsString, - const Expr *CondVarExpr, - const bool tookTrue, - BugReporterContext &BRC, - BugReport &report, - const ExplodedNode *N) { +std::shared_ptr ConditionBRVisitor::VisitConditionVariable( + StringRef LhsString, const Expr *CondVarExpr, const bool tookTrue, + BugReporterContext &BRC, BugReport &report, const ExplodedNode *N) { // FIXME: If there's already a constraint tracker for this variable, // we shouldn't emit anything here (c.f. the double note in // test/Analysis/inlining/path-notes.c) @@ -1585,8 +1566,7 @@ ConditionBRVisitor::VisitConditionVariable(StringRef LhsString, const LocationContext *LCtx = N->getLocationContext(); PathDiagnosticLocation Loc(CondVarExpr, BRC.getSourceManager(), LCtx); - PathDiagnosticEventPiece *event = - new PathDiagnosticEventPiece(Loc, Out.str()); + auto event = std::make_shared(Loc, Out.str()); if (const DeclRefExpr *DR = dyn_cast(CondVarExpr)) { if (const VarDecl *VD = dyn_cast(DR->getDecl())) { @@ -1601,13 +1581,10 @@ ConditionBRVisitor::VisitConditionVariable(StringRef LhsString, return event; } -PathDiagnosticPiece * -ConditionBRVisitor::VisitTrueTest(const Expr *Cond, - const DeclRefExpr *DR, - const bool tookTrue, - BugReporterContext &BRC, - BugReport &report, - const ExplodedNode *N) { +std::shared_ptr +ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, + const bool tookTrue, BugReporterContext &BRC, + BugReport &report, const ExplodedNode *N) { const VarDecl *VD = dyn_cast(DR->getDecl()); if (!VD) @@ -1631,8 +1608,7 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const LocationContext *LCtx = N->getLocationContext(); PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx); - PathDiagnosticEventPiece *event = - new PathDiagnosticEventPiece(Loc, Out.str()); + auto event = std::make_shared(Loc, Out.str()); const ProgramState *state = N->getState().get(); if (const MemRegion *R = state->getLValue(VD, LCtx).getAsRegion()) { @@ -1644,7 +1620,7 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, event->setPrunable(false); } } - return event; + return std::move(event); } const char *const ConditionBRVisitor::GenericTrueMessage = @@ -1746,11 +1722,10 @@ LikelyFalsePositiveSuppressionBRVisitor::getEndPath(BugReporterContext &BRC, return nullptr; } -PathDiagnosticPiece * +std::shared_ptr UndefOrNullArgVisitor::VisitNode(const ExplodedNode *N, - const ExplodedNode *PrevN, - BugReporterContext &BRC, - BugReport &BR) { + const ExplodedNode *PrevN, + BugReporterContext &BRC, BugReport &BR) { ProgramStateRef State = N->getState(); ProgramPoint ProgLoc = N->getLocation(); @@ -1800,7 +1775,7 @@ UndefOrNullArgVisitor::VisitNode(const ExplodedNode *N, return nullptr; } -PathDiagnosticPiece * +std::shared_ptr CXXSelfAssignmentBRVisitor::VisitNode(const ExplodedNode *Succ, const ExplodedNode *Pred, BugReporterContext &BRC, BugReport &BR) { @@ -1847,8 +1822,8 @@ CXXSelfAssignmentBRVisitor::VisitNode(const ExplodedNode *Succ, Out << "Assuming " << Met->getParamDecl(0)->getName() << ((Param == This) ? " == " : " != ") << "*this"; - auto *Piece = new PathDiagnosticEventPiece(L, Out.str()); + auto Piece = std::make_shared(L, Out.str()); Piece->addRange(Met->getSourceRange()); - return Piece; + return std::move(Piece); } diff --git a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index f157c3dd6ce2..f0f6dd2e43e7 100644 --- a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -156,8 +156,8 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, unsigned TotalPieces = path.size(); unsigned TotalNotePieces = std::count_if(path.begin(), path.end(), - [](const IntrusiveRefCntPtr &p) { - return isa(p.get()); + [](const std::shared_ptr &p) { + return isa(*p); }); unsigned TotalRegularPieces = TotalPieces - TotalNotePieces; @@ -615,12 +615,13 @@ unsigned HTMLDiagnostics::ProcessMacroPiece(raw_ostream &os, I!=E; ++I) { if (const PathDiagnosticMacroPiece *MP = - dyn_cast(*I)) { + dyn_cast(I->get())) { num = ProcessMacroPiece(os, *MP, num); continue; } - if (PathDiagnosticEventPiece *EP = dyn_cast(*I)) { + if (PathDiagnosticEventPiece *EP = + dyn_cast(I->get())) { os << "
" "" diff --git a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp index 5675cb2026f0..7c5ee3b25944 100644 --- a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp +++ b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp @@ -29,11 +29,10 @@ using namespace clang; using namespace ento; bool PathDiagnosticMacroPiece::containsEvent() const { - for (PathPieces::const_iterator I = subPieces.begin(), E = subPieces.end(); - I!=E; ++I) { - if (isa(*I)) + for (auto &P : subPieces) { + if (isa(*P)) return true; - if (PathDiagnosticMacroPiece *MP = dyn_cast(*I)) + if (auto *MP = dyn_cast(P.get())) if (MP->containsEvent()) return true; } @@ -64,33 +63,27 @@ PathDiagnosticNotePiece::~PathDiagnosticNotePiece() {} void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current, bool ShouldFlattenMacros) const { - for (PathPieces::const_iterator I = begin(), E = end(); I != E; ++I) { - PathDiagnosticPiece *Piece = I->get(); - + for (auto &Piece : *this) { switch (Piece->getKind()) { case PathDiagnosticPiece::Call: { - PathDiagnosticCallPiece *Call = cast(Piece); - IntrusiveRefCntPtr CallEnter = - Call->getCallEnterEvent(); - if (CallEnter) - Current.push_back(CallEnter); - Call->path.flattenTo(Primary, Primary, ShouldFlattenMacros); - IntrusiveRefCntPtr callExit = - Call->getCallExitEvent(); - if (callExit) - Current.push_back(callExit); + auto &Call = cast(*Piece); + if (auto CallEnter = Call.getCallEnterEvent()) + Current.push_back(std::move(CallEnter)); + Call.path.flattenTo(Primary, Primary, ShouldFlattenMacros); + if (auto callExit = Call.getCallExitEvent()) + Current.push_back(std::move(callExit)); break; } case PathDiagnosticPiece::Macro: { - PathDiagnosticMacroPiece *Macro = cast(Piece); + auto &Macro = cast(*Piece); if (ShouldFlattenMacros) { - Macro->subPieces.flattenTo(Primary, Primary, ShouldFlattenMacros); + Macro.subPieces.flattenTo(Primary, Primary, ShouldFlattenMacros); } else { Current.push_back(Piece); PathPieces NewPath; - Macro->subPieces.flattenTo(Primary, NewPath, ShouldFlattenMacros); + Macro.subPieces.flattenTo(Primary, NewPath, ShouldFlattenMacros); // FIXME: This probably shouldn't mutate the original path piece. - Macro->subPieces = NewPath; + Macro.subPieces = NewPath; } break; } @@ -143,7 +136,7 @@ getFirstStackedCallToHeaderFile(PathDiagnosticCallPiece *CP, // Check if the last piece in the callee path is a call to a function outside // of the main file. if (PathDiagnosticCallPiece *CPInner = - dyn_cast(Path.back())) { + dyn_cast(Path.back().get())) { return getFirstStackedCallToHeaderFile(CPInner, SMgr); } @@ -890,24 +883,26 @@ void PathDiagnosticLocation::flatten() { // Manipulation of PathDiagnosticCallPieces. //===----------------------------------------------------------------------===// -PathDiagnosticCallPiece * -PathDiagnosticCallPiece::construct(const ExplodedNode *N, - const CallExitEnd &CE, +std::shared_ptr +PathDiagnosticCallPiece::construct(const ExplodedNode *N, const CallExitEnd &CE, const SourceManager &SM) { const Decl *caller = CE.getLocationContext()->getDecl(); PathDiagnosticLocation pos = getLocationForCaller(CE.getCalleeContext(), CE.getLocationContext(), SM); - return new PathDiagnosticCallPiece(caller, pos); + return std::shared_ptr( + new PathDiagnosticCallPiece(caller, pos)); } PathDiagnosticCallPiece * PathDiagnosticCallPiece::construct(PathPieces &path, const Decl *caller) { - PathDiagnosticCallPiece *C = new PathDiagnosticCallPiece(path, caller); + std::shared_ptr C( + new PathDiagnosticCallPiece(path, caller)); path.clear(); - path.push_front(C); - return C; + auto *R = C.get(); + path.push_front(std::move(C)); + return R; } void PathDiagnosticCallPiece::setCallee(const CallEnter &CE, @@ -989,7 +984,7 @@ static bool describeCodeDecl(raw_ostream &Out, const Decl *D, return true; } -IntrusiveRefCntPtr +std::shared_ptr PathDiagnosticCallPiece::getCallEnterEvent() const { if (!Callee) return nullptr; @@ -1001,10 +996,10 @@ PathDiagnosticCallPiece::getCallEnterEvent() const { describeCodeDecl(Out, Callee, /*ExtendedDescription=*/true); assert(callEnter.asLocation().isValid()); - return new PathDiagnosticEventPiece(callEnter, Out.str()); + return std::make_shared(callEnter, Out.str()); } -IntrusiveRefCntPtr +std::shared_ptr PathDiagnosticCallPiece::getCallEnterWithinCallerEvent() const { if (!callEnterWithin.asLocation().isValid()) return nullptr; @@ -1020,10 +1015,10 @@ PathDiagnosticCallPiece::getCallEnterWithinCallerEvent() const { Out << "Entered call"; describeCodeDecl(Out, Caller, /*ExtendedDescription=*/false, " from "); - return new PathDiagnosticEventPiece(callEnterWithin, Out.str()); + return std::make_shared(callEnterWithin, Out.str()); } -IntrusiveRefCntPtr +std::shared_ptr PathDiagnosticCallPiece::getCallExitEvent() const { if (NoExit) return nullptr; @@ -1042,7 +1037,7 @@ PathDiagnosticCallPiece::getCallExitEvent() const { } assert(callReturn.asLocation().isValid()); - return new PathDiagnosticEventPiece(callReturn, Out.str()); + return std::make_shared(callReturn, Out.str()); } static void compute_path_size(const PathPieces &pieces, unsigned &size) { diff --git a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp index c5263ee0e5ca..66812ed8ff5b 100644 --- a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -208,19 +208,14 @@ static void ReportCall(raw_ostream &o, unsigned indent, unsigned depth) { - IntrusiveRefCntPtr callEnter = - P.getCallEnterEvent(); - - if (callEnter) + if (auto callEnter = P.getCallEnterEvent()) ReportPiece(o, *callEnter, FM, SM, LangOpts, indent, depth, true, P.isLastInMainSourceFile()); - IntrusiveRefCntPtr callEnterWithinCaller = - P.getCallEnterWithinCallerEvent(); ++depth; - if (callEnterWithinCaller) + if (auto callEnterWithinCaller = P.getCallEnterWithinCallerEvent()) ReportPiece(o, *callEnterWithinCaller, FM, SM, LangOpts, indent, depth, true); @@ -229,10 +224,7 @@ static void ReportCall(raw_ostream &o, --depth; - IntrusiveRefCntPtr callExit = - P.getCallExitEvent(); - - if (callExit) + if (auto callExit = P.getCallExitEvent()) ReportPiece(o, *callExit, FM, SM, LangOpts, indent, depth, true); } @@ -299,10 +291,9 @@ void PlistDiagnostics::FlushDiagnosticsImpl( if (!Diags.empty()) SM = &Diags.front()->path.front()->getLocation().getManager(); - - auto AddPieceFID = [&FM, &Fids, SM](const PathDiagnosticPiece *Piece)->void { - AddFID(FM, Fids, *SM, Piece->getLocation().asLocation()); - ArrayRef Ranges = Piece->getRanges(); + auto AddPieceFID = [&FM, &Fids, SM](const PathDiagnosticPiece &Piece) { + AddFID(FM, Fids, *SM, Piece.getLocation().asLocation()); + ArrayRef Ranges = Piece.getRanges(); for (const SourceRange &Range : Ranges) { AddFID(FM, Fids, *SM, Range.getBegin()); AddFID(FM, Fids, *SM, Range.getEnd()); @@ -318,23 +309,20 @@ void PlistDiagnostics::FlushDiagnosticsImpl( const PathPieces &Path = *WorkList.pop_back_val(); for (const auto &Iter : Path) { - const PathDiagnosticPiece *Piece = Iter.get(); + const PathDiagnosticPiece &Piece = *Iter; AddPieceFID(Piece); if (const PathDiagnosticCallPiece *Call = - dyn_cast(Piece)) { - if (IntrusiveRefCntPtr - CallEnterWithin = Call->getCallEnterWithinCallerEvent()) - AddPieceFID(CallEnterWithin.get()); + dyn_cast(&Piece)) { + if (auto CallEnterWithin = Call->getCallEnterWithinCallerEvent()) + AddPieceFID(*CallEnterWithin); - if (IntrusiveRefCntPtr - CallEnterEvent = Call->getCallEnterEvent()) - AddPieceFID(CallEnterEvent.get()); + if (auto CallEnterEvent = Call->getCallEnterEvent()) + AddPieceFID(*CallEnterEvent); WorkList.push_back(&Call->path); - } - else if (const PathDiagnosticMacroPiece *Macro = - dyn_cast(Piece)) { + } else if (const PathDiagnosticMacroPiece *Macro = + dyn_cast(&Piece)) { WorkList.push_back(&Macro->subPieces); } } diff --git a/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp b/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp index 31b6638e651f..6792f89876cd 100644 --- a/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp +++ b/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp @@ -116,7 +116,7 @@ ento::createCheckerManager(AnalyzerOptions &opts, const LangOptions &langOpts, ArrayRef plugins, DiagnosticsEngine &diags) { std::unique_ptr checkerMgr( - new CheckerManager(langOpts, &opts)); + new CheckerManager(langOpts, opts)); SmallVector checkerOpts = getCheckerOptList(opts); diff --git a/lib/StaticAnalyzer/Frontend/ModelInjector.cpp b/lib/StaticAnalyzer/Frontend/ModelInjector.cpp index 0a284851b08d..c6f3baa7e3b2 100644 --- a/lib/StaticAnalyzer/Frontend/ModelInjector.cpp +++ b/lib/StaticAnalyzer/Frontend/ModelInjector.cpp @@ -62,8 +62,7 @@ void ModelInjector::onBodySynthesis(const NamedDecl *D) { return; } - IntrusiveRefCntPtr Invocation( - new CompilerInvocation(CI.getInvocation())); + auto Invocation = std::make_shared(CI.getInvocation()); FrontendOptions &FrontendOpts = Invocation->getFrontendOpts(); InputKind IK = IK_CXX; // FIXME @@ -76,7 +75,7 @@ void ModelInjector::onBodySynthesis(const NamedDecl *D) { // Modules are parsed by a separate CompilerInstance, so this code mimics that // behavior for models CompilerInstance Instance(CI.getPCHContainerOperations()); - Instance.setInvocation(&*Invocation); + Instance.setInvocation(std::move(Invocation)); Instance.createDiagnostics( new ForwardingDiagnosticConsumer(CI.getDiagnosticClient()), /*ShouldOwnClient=*/true); @@ -89,7 +88,7 @@ void ModelInjector::onBodySynthesis(const NamedDecl *D) { // is set to true to avoid double free issues Instance.setFileManager(&CI.getFileManager()); Instance.setSourceManager(&SM); - Instance.setPreprocessor(&CI.getPreprocessor()); + Instance.setPreprocessor(CI.getPreprocessorPtr()); Instance.setASTContext(&CI.getASTContext()); Instance.getPreprocessor().InitializeForModelFile(); diff --git a/lib/Tooling/Tooling.cpp b/lib/Tooling/Tooling.cpp index 529c47ef1e7a..25cee98078f3 100644 --- a/lib/Tooling/Tooling.cpp +++ b/lib/Tooling/Tooling.cpp @@ -275,13 +275,13 @@ bool ToolInvocation::run() { Invocation->getPreprocessorOpts().addRemappedFile(It.getKey(), Input.release()); } - return runInvocation(BinaryName, Compilation.get(), Invocation.release(), + return runInvocation(BinaryName, Compilation.get(), std::move(Invocation), std::move(PCHContainerOps)); } bool ToolInvocation::runInvocation( const char *BinaryName, clang::driver::Compilation *Compilation, - clang::CompilerInvocation *Invocation, + std::shared_ptr Invocation, std::shared_ptr PCHContainerOps) { // Show the invocation, with -v. if (Invocation->getHeaderSearchOpts().Verbose) { @@ -290,17 +290,17 @@ bool ToolInvocation::runInvocation( llvm::errs() << "\n"; } - return Action->runInvocation(Invocation, Files, std::move(PCHContainerOps), - DiagConsumer); + return Action->runInvocation(std::move(Invocation), Files, + std::move(PCHContainerOps), DiagConsumer); } bool FrontendActionFactory::runInvocation( - CompilerInvocation *Invocation, FileManager *Files, + std::shared_ptr Invocation, FileManager *Files, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer) { // Create a compiler instance to handle the actual work. clang::CompilerInstance Compiler(std::move(PCHContainerOps)); - Compiler.setInvocation(Invocation); + Compiler.setInvocation(std::move(Invocation)); Compiler.setFileManager(Files); // The FrontendAction can have lifetime requirements for Compiler or its @@ -474,7 +474,8 @@ class ASTBuilderAction : public ToolAction { public: ASTBuilderAction(std::vector> &ASTs) : ASTs(ASTs) {} - bool runInvocation(CompilerInvocation *Invocation, FileManager *Files, + bool runInvocation(std::shared_ptr Invocation, + FileManager *Files, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer) override { std::unique_ptr AST = ASTUnit::LoadFromCompilerInvocation( diff --git a/test/CodeGen/builtins-ppc-error.c b/test/CodeGen/builtins-ppc-error.c new file mode 100644 index 000000000000..5860c4f9e77e --- /dev/null +++ b/test/CodeGen/builtins-ppc-error.c @@ -0,0 +1,20 @@ +// REQUIRES: powerpc-registered-target + +// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \ +// RUN: -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -Wall -Werror -verify %s + +// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \ +// RUN: -triple powerpc64le-unknown-unknown -fsyntax-only \ +// RUN: -Wall -Werror -verify %s + +#include + +extern vector signed int vsi; +extern vector unsigned char vuc; + +void testInsertWord1(void) { + int index = 5; + vector unsigned char v1 = vec_insert4b(vsi, vuc, index); // expected-error {{argument to '__builtin_vsx_insertword' must be a constant integer}} + vector unsigned long long v2 = vec_extract4b(vuc, index); // expected-error {{argument to '__builtin_vsx_extractuword' must be a constant integer}} +} diff --git a/test/CodeGen/builtins-ppc-p9vector.c b/test/CodeGen/builtins-ppc-p9vector.c index f70d2f9f1504..bd0ad182f15f 100644 --- a/test/CodeGen/builtins-ppc-p9vector.c +++ b/test/CodeGen/builtins-ppc-p9vector.c @@ -1166,17 +1166,52 @@ vector float test114(void) { // CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-BE-NEXT: ret <4 x float> -// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> -// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) -// CHECK-LE-NEXT: ret <4 x float> +// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-NEXT: ret <4 x float> return vec_extract_fp32_from_shorth(vusa); } vector float test115(void) { // CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-BE-NEXT: ret <4 x float> -// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> -// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) -// CHECK-LE-NEXT: ret <4 x float> +// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-NEXT: ret <4 x float> return vec_extract_fp32_from_shortl(vusa); } +vector unsigned char test116(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-NEXT: [[T2:%.+]] = bitcast <2 x i64> [[T1]] to <4 x i32> +// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 5) +// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8> + return vec_insert4b(vuia, vuca, 7); +} +vector unsigned char test117(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 12) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-NEXT: [[T2:%.+]] = bitcast <2 x i64> [[T1]] to <4 x i32> +// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 0) +// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8> + return vec_insert4b(vuia, vuca, 13); +} +vector unsigned long long test118(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1) +// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> +// CHECK-NEXT: ret <2 x i64> + return vec_extract4b(vuca, 11); +} +vector unsigned long long test119(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 0) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 12) +// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> +// CHECK-NEXT: ret <2 x i64> + return vec_extract4b(vuca, -5); +} + diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c index c2f01ae1a66f..d7a26f8a7d4b 100644 --- a/test/CodeGen/catch-undef-behavior.c +++ b/test/CodeGen/catch-undef-behavior.c @@ -6,16 +6,16 @@ // CHECK-UBSAN: @[[INT:.*]] = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" } // FIXME: When we only emit each type once, use [[INT]] more below. -// CHECK-UBSAN: @[[LINE_100:.*]] = private unnamed_addr global {{.*}}, i32 100, i32 5 {{.*}} @[[INT]], i64 4, i8 1 -// CHECK-UBSAN: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 10 {{.*}}, i64 4, i8 0 +// CHECK-UBSAN: @[[LINE_100:.*]] = private unnamed_addr global {{.*}}, i32 100, i32 5 {{.*}} @[[INT]], i8 2, i8 1 +// CHECK-UBSAN: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 10 {{.*}}, i8 2, i8 0 // CHECK-UBSAN: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 12 {{.*}} @{{.*}}, {{.*}} @{{.*}} // CHECK-UBSAN: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 12 {{.*}} @{{.*}}, {{.*}} @{{.*}} -// CHECK-UBSAN: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 10 {{.*}} @{{.*}}, i64 4, i8 0 } -// CHECK-UBSAN: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 3 {{.*}} @{{.*}}, i64 4, i8 1 } +// CHECK-UBSAN: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 10 {{.*}} @{{.*}}, i8 2, i8 0 } +// CHECK-UBSAN: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 3 {{.*}} @{{.*}}, i8 2, i8 1 } // CHECK-UBSAN: @[[STRUCT_S:.*]] = private unnamed_addr constant { i16, i16, [11 x i8] } { i16 -1, i16 0, [11 x i8] c"'struct S'\00" } -// CHECK-UBSAN: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 14 {{.*}} @[[STRUCT_S]], i64 4, i8 3 } +// CHECK-UBSAN: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 14 {{.*}} @[[STRUCT_S]], i8 2, i8 3 } // CHECK-UBSAN: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 12 {{.*}} @{{.*}} } // CHECK-UBSAN: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 11 {{.*}} @{{.*}} } // CHECK-UBSAN: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 10 {{.*}} @{{.*}} } @@ -54,7 +54,7 @@ void foo() { // CHECK-TRAP: br i1 %[[OK]], {{.*}} // CHECK-UBSAN: %[[ARG:.*]] = ptrtoint {{.*}} %[[PTR]] to i64 - // CHECK-UBSAN-NEXT: call void @__ubsan_handle_type_mismatch(i8* bitcast ({{.*}} @[[LINE_100]] to i8*), i64 %[[ARG]]) + // CHECK-UBSAN-NEXT: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_100]] to i8*), i64 %[[ARG]]) // CHECK-TRAP: call void @llvm.trap() [[NR_NUW:#[0-9]+]] // CHECK-TRAP-NEXT: unreachable @@ -62,7 +62,7 @@ void foo() { // With -fsanitize=null, only perform the null check. // CHECK-NULL: %[[NULL:.*]] = icmp ne {{.*}}, null // CHECK-NULL: br i1 %[[NULL]] - // CHECK-NULL: call void @__ubsan_handle_type_mismatch(i8* bitcast ({{.*}} @[[LINE_100]] to i8*), i64 %{{.*}}) + // CHECK-NULL: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_100]] to i8*), i64 %{{.*}}) #line 100 u.i=1; } @@ -77,7 +77,7 @@ int bar(int *a) { // CHECK-COMMON-NEXT: icmp eq i64 %[[MISALIGN]], 0 // CHECK-UBSAN: %[[ARG:.*]] = ptrtoint - // CHECK-UBSAN-NEXT: call void @__ubsan_handle_type_mismatch(i8* bitcast ({{.*}} @[[LINE_200]] to i8*), i64 %[[ARG]]) + // CHECK-UBSAN-NEXT: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_200]] to i8*), i64 %[[ARG]]) // CHECK-TRAP: call void @llvm.trap() [[NR_NUW]] // CHECK-TRAP-NEXT: unreachable @@ -145,7 +145,7 @@ int rsh_inbounds(int a, int b) { // CHECK-COMMON-LABEL: @load int load(int *p) { - // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch(i8* bitcast ({{.*}} @[[LINE_500]] to i8*), i64 %{{.*}}) + // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_500]] to i8*), i64 %{{.*}}) // CHECK-TRAP: call void @llvm.trap() [[NR_NUW]] // CHECK-TRAP-NEXT: unreachable @@ -155,7 +155,7 @@ int load(int *p) { // CHECK-COMMON-LABEL: @store void store(int *p, int q) { - // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch(i8* bitcast ({{.*}} @[[LINE_600]] to i8*), i64 %{{.*}}) + // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_600]] to i8*), i64 %{{.*}}) // CHECK-TRAP: call void @llvm.trap() [[NR_NUW]] // CHECK-TRAP-NEXT: unreachable @@ -167,7 +167,7 @@ struct S { int k; }; // CHECK-COMMON-LABEL: @member_access int *member_access(struct S *p) { - // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch(i8* bitcast ({{.*}} @[[LINE_700]] to i8*), i64 %{{.*}}) + // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({{.*}} @[[LINE_700]] to i8*), i64 %{{.*}}) // CHECK-TRAP: call void @llvm.trap() [[NR_NUW]] // CHECK-TRAP-NEXT: unreachable diff --git a/test/CodeGen/sanitize-recover.c b/test/CodeGen/sanitize-recover.c index b263f5163181..dd8734e971eb 100644 --- a/test/CodeGen/sanitize-recover.c +++ b/test/CodeGen/sanitize-recover.c @@ -33,7 +33,7 @@ void foo() { // PARTIAL: br i1 %[[CHECK012]], {{.*}} !prof ![[WEIGHT_MD:.*]], !nosanitize // PARTIAL: br i1 %[[CHECK02]], {{.*}} - // PARTIAL: call void @__ubsan_handle_type_mismatch_abort( + // PARTIAL: call void @__ubsan_handle_type_mismatch_v1_abort( // PARTIAL-NEXT: unreachable - // PARTIAL: call void @__ubsan_handle_type_mismatch( + // PARTIAL: call void @__ubsan_handle_type_mismatch_v1( } diff --git a/test/CodeGen/vectorcall.c b/test/CodeGen/vectorcall.c index b38d5e5fbc5b..167f72ca2cfd 100644 --- a/test/CodeGen/vectorcall.c +++ b/test/CodeGen/vectorcall.c @@ -1,22 +1,22 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X32 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 void __vectorcall v1(int a, int b) {} -// CHECK: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b) +// X32: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b) // X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b) void __vectorcall v2(char a, char b) {} -// CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) +// X32: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) struct Small { int x; }; void __vectorcall v3(int a, struct Small b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) +// X32: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) struct Large { int a[5]; }; void __vectorcall v4(int a, struct Large b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) +// X32: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c) struct HFA2 { double x, y; }; @@ -24,54 +24,84 @@ struct HFA4 { double w, x, y, z; }; struct HFA5 { double v, w, x, y, z; }; void __vectorcall hfa1(int a, struct HFA4 b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) -// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c) +// X32: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c) +// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c) // HFAs that would require more than six total SSE registers are passed // indirectly. Additional vector arguments can consume the rest of the SSE // registers. void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} -// CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c) -// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* %b, double %c) +// X32: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c) +// X64: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c) // Ensure that we pass builtin types directly while counting them against the // SSE register usage. void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} -// CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f) +// X32: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f) // X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f) // Aggregates with more than four elements are not HFAs and are passed byval. // Because they are not classified as homogeneous, they don't get special // handling to ensure alignment. void __vectorcall hfa4(struct HFA5 a) {} -// CHECK: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4) +// X32: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4) // X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a) // Return HFAs of 4 or fewer elements in registers. static struct HFA2 g_hfa2; struct HFA2 __vectorcall hfa5(void) { return g_hfa2; } -// CHECK: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() +// X32: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() // X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() typedef float __attribute__((vector_size(16))) v4f32; struct HVA2 { v4f32 x, y; }; +struct HVA3 { v4f32 w, x, y; }; struct HVA4 { v4f32 w, x, y, z; }; +struct HVA5 { v4f32 w, x, y, z, p; }; -void __vectorcall hva1(int a, struct HVA4 b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01hva1@@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) -// X64: define x86_vectorcallcc void @"\01hva1@@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c) +v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c) +// X64: define x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c) -void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {} -// CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c) -// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* %b, <4 x float> %c) +v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c) +// X64: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c) -void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {} -// CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f) -// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) +v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f) +// X64: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) + +// vector types have higher priority then HVA structures, So vector types are allocated first +// and HVAs are allocated if enough registers are available +v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c) +// X64: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c) + +v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) +// X64: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) + +struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;} +// X32: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b) +// X64: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b) + +struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;} +// X32: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result) +// X64: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result) + +v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f) +// X64: define x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) typedef float __attribute__((ext_vector_type(3))) v3f32; struct OddSizeHVA { v3f32 x, y; }; void __vectorcall odd_size_hva(struct OddSizeHVA a) {} -// CHECK: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1) -// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1) +// X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) +// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) + +// The Vectorcall ABI only allows passing the first 6 items in registers, so this shouldn't +// consider 'p7' as a register. Instead p5 gets put into the register on the second pass. +struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7){ return p1;} +// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@80"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7) +// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@96"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7) diff --git a/test/CodeGenCXX/dllexport.cpp b/test/CodeGenCXX/dllexport.cpp index eb9ca79b7b40..116176e2cb92 100644 --- a/test/CodeGenCXX/dllexport.cpp +++ b/test/CodeGenCXX/dllexport.cpp @@ -515,6 +515,18 @@ struct __declspec(dllexport) ClassWithClosure { // M32-DAG: ret void }; +template struct TemplateWithClosure { + TemplateWithClosure(int x = sizeof(T)) {} +}; +extern template struct TemplateWithClosure; +template struct __declspec(dllexport) TemplateWithClosure; +extern template struct TemplateWithClosure; +template struct __declspec(dllexport) TemplateWithClosure; +// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$TemplateWithClosure@D@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// M32-DAG: call {{.*}} @"\01??0?$TemplateWithClosure@D@@QAE@H@Z"({{.*}}, i32 1) +// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$TemplateWithClosure@H@@QAEXXZ"({{.*}}) {{#[0-9]+}} comdat +// M32-DAG: call {{.*}} @"\01??0?$TemplateWithClosure@H@@QAE@H@Z"({{.*}}, i32 4) + struct __declspec(dllexport) NestedOuter { DELETE_IMPLICIT_MEMBERS(NestedOuter); NestedOuter(void *p = 0) {} diff --git a/test/CodeGenCXX/homogeneous-aggregates.cpp b/test/CodeGenCXX/homogeneous-aggregates.cpp index 67911c0d7f90..1338b25e21ae 100644 --- a/test/CodeGenCXX/homogeneous-aggregates.cpp +++ b/test/CodeGenCXX/homogeneous-aggregates.cpp @@ -47,7 +47,7 @@ D1 CC func_D1(D1 x) { return x; } // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce) // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce) // ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce) -// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2) +// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce) D2 CC func_D2(D2 x) { return x; } // PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce) @@ -92,7 +92,7 @@ struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; }; void CC with_empty_base(HVAWithEmptyBase a) {} // FIXME: MSVC doesn't consider this an HVA because of the empty base. -// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(float %a.0, float %a.1, float %a.2) +// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(%struct.HVAWithEmptyBase inreg %a.coerce) struct HVAWithEmptyBitField : Float1, Float2 { int : 0; // Takes no space. @@ -102,5 +102,5 @@ struct HVAWithEmptyBitField : Float1, Float2 { // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) // ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce) -// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2) +// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce) void CC with_empty_bitfield(HVAWithEmptyBitField a) {} diff --git a/test/CodeGenCXX/ubsan-vtable-checks.cpp b/test/CodeGenCXX/ubsan-vtable-checks.cpp index 80af77d4ea6d..e684ae9180f1 100644 --- a/test/CodeGenCXX/ubsan-vtable-checks.cpp +++ b/test/CodeGenCXX/ubsan-vtable-checks.cpp @@ -21,7 +21,7 @@ int get_v(T* t) { // CHECK-NULL-NOT: load {{.*}} (%struct.T*{{.*}})**, {{.*}} (%struct.T*{{.*}})*** // CHECK-NULL: [[UBSAN_CMP_RES:%[0-9]+]] = icmp ne %struct.T* %{{[_a-z0-9]+}}, null // CHECK-NULL-NEXT: br i1 [[UBSAN_CMP_RES]], label %{{.*}}, label %{{.*}} - // CHECK-NULL: call void @__ubsan_handle_type_mismatch_abort + // CHECK-NULL: call void @__ubsan_handle_type_mismatch_v1_abort // Second, we check that vtable is actually loaded once the type check is done. // CHECK-NULL: load {{.*}} (%struct.T*{{.*}})**, {{.*}} (%struct.T*{{.*}})*** return t->v(); diff --git a/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/.keep b/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/include/.keep b/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/include/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/lib/.keep b/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/lib/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_30.10.bc b/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_30.10.bc new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_35.10.bc b/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/nvvm/libdevice/libdevice.compute_35.10.bc new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/Driver/avr-toolchain.c b/test/Driver/avr-toolchain.c new file mode 100644 index 000000000000..46a3c10fa3a1 --- /dev/null +++ b/test/Driver/avr-toolchain.c @@ -0,0 +1,4 @@ +// A basic clang -cc1 command-line. + +// RUN: %clang %s -### -no-canonical-prefixes -target avr 2>&1 | FileCheck -check-prefix=CC1 %s +// CC1: clang{{.*}} "-cc1" "-triple" "avr" diff --git a/test/Driver/cuda-version-check.cu b/test/Driver/cuda-version-check.cu index cb2ac7994f75..46ca72f2ea0c 100644 --- a/test/Driver/cuda-version-check.cu +++ b/test/Driver/cuda-version-check.cu @@ -2,40 +2,40 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang -v -### --cuda-gpu-arch=sm_20 --sysroot=%S/Inputs/CUDA 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --sysroot=%S/Inputs/CUDA 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK -// RUN: %clang -v -### --cuda-gpu-arch=sm_20 --sysroot=%S/Inputs/CUDA_80 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --sysroot=%S/Inputs/CUDA_80 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA_80 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA_80 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK // The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60. -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // This should only complain about sm_60, not sm_35. -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \ // RUN: --sysroot=%S/Inputs/CUDA 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35 // We should get two errors here, one for sm_60 and one for sm_61. -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \ // RUN: --sysroot=%S/Inputs/CUDA 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61 // We should still get an error if we pass -nocudainc, because this compilation // would invoke ptxas, and we do a version check on that, too. -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 -nocudainc --sysroot=%S/Inputs/CUDA 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --sysroot=%S/Inputs/CUDA 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // If with -nocudainc and -E, we don't touch the CUDA install, so we // shouldn't get an error. -// RUN: %clang -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \ +// RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \ // RUN: --sysroot=%S/Inputs/CUDA 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK // --no-cuda-version-check should suppress all of these errors. -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA 2>&1 \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --sysroot=%S/Inputs/CUDA 2>&1 \ // RUN: --no-cuda-version-check %s | \ // RUN: FileCheck %s --check-prefix=OK @@ -43,9 +43,9 @@ // therefore we should not get an error in host-only mode. We use the -S here // to avoid the error being produced in case by the assembler tool, which does // the same check. -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --sysroot=%S/Inputs/CUDA -S 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --sysroot=%S/Inputs/CUDA -S 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK -// RUN: %clang -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --sysroot=%S/Inputs/CUDA -S 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --sysroot=%S/Inputs/CUDA -S 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // OK-NOT: error: GPU arch diff --git a/test/Driver/cuda-windows.cu b/test/Driver/cuda-windows.cu new file mode 100644 index 000000000000..1d67710647c0 --- /dev/null +++ b/test/Driver/cuda-windows.cu @@ -0,0 +1,14 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target +// +// RUN: %clang -v --target=i386-pc-windows-msvc \ +// RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s +// RUN: %clang -v --target=i386-pc-windows-mingw32 \ +// RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s + +// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0 +// CHECK: "-cc1" "-triple" "nvptx-nvidia-cuda" +// CHECK-SAME: "-fms-extensions" +// CHECK-SAME: "-fms-compatibility" +// CHECK-SAME: "-fms-compatibility-version= diff --git a/test/Index/complete-block-properties.m b/test/Index/complete-block-properties.m index d166147294e1..4697703c8e5c 100644 --- a/test/Index/complete-block-properties.m +++ b/test/Index/complete-block-properties.m @@ -43,7 +43,7 @@ typedef int (^BarBlock)(int *); //CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType void}{TypedText block}{LeftParen (}{RightParen )} (35) //CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType void (^)()}{TypedText block}{Equal = }{Placeholder ^(void)} (38) //CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType Foo}{TypedText blocker}{LeftParen (}{Placeholder int x}{Comma , }{Placeholder Foo y}{Comma , }{Placeholder ^(Foo *someParameter)foo}{RightParen )} (35) -//CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType Foo (^)(int, Foo, FooBlock)}{TypedText blocker}{Equal = }{Placeholder ^Foo(int x, Foo y, FooBlock foo)} (38) +//CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType Foo (^)(int, Foo, FooBlock)}{TypedText blocker}{Equal = }{Placeholder ^Foo(int x, Foo y, FooBlock foo)} (32) //CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType int}{TypedText foo} (35) //CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType void}{TypedText fooBlock}{LeftParen (}{Placeholder Foo *someParameter}{RightParen )} (35) //CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType Test *}{TypedText getObject}{LeftParen (}{Placeholder int index}{RightParen )} (35) diff --git a/test/Index/complete-block-property-assignment.m b/test/Index/complete-block-property-assignment.m index ced3b7fa1302..908e18629528 100644 --- a/test/Index/complete-block-property-assignment.m +++ b/test/Index/complete-block-property-assignment.m @@ -15,6 +15,7 @@ typedef void (^FooBlock)(Foo *someParameter); @interface Test : Obj @property (readwrite, nonatomic, copy) FooBlock onEventHandler; @property (readonly, nonatomic, copy) void (^onReadonly)(int *someParameter); +@property (readwrite, nonatomic, copy) int (^processEvent)(int eventCode); @property (readonly, nonatomic, strong) Obj *obj; @end @@ -29,10 +30,10 @@ typedef void (^FooBlock)(Foo *someParameter); SELFY.foo = 2 } -// RUN: c-index-test -code-completion-at=%s:26:8 %s | FileCheck -check-prefix=CHECK-CC1 %s -// RUN: c-index-test -code-completion-at=%s:27:27 %s | FileCheck -check-prefix=CHECK-CC1 %s -// RUN: c-index-test -code-completion-at=%s:28:22 %s | FileCheck -check-prefix=CHECK-CC1 %s -// RUN: c-index-test -code-completion-at=%s:29:9 %s | FileCheck -check-prefix=CHECK-CC1 %s +// RUN: c-index-test -code-completion-at=%s:27:8 %s | FileCheck -check-prefix=CHECK-CC1 %s +// RUN: c-index-test -code-completion-at=%s:28:27 %s | FileCheck -check-prefix=CHECK-CC1 %s +// RUN: c-index-test -code-completion-at=%s:29:22 %s | FileCheck -check-prefix=CHECK-CC1 %s +// RUN: c-index-test -code-completion-at=%s:30:9 %s | FileCheck -check-prefix=CHECK-CC1 %s // CHECK-CC1: ObjCPropertyDecl:{ResultType int}{TypedText foo} (35) // CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType Obj *}{TypedText obj} (35) // CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType void}{TypedText onAction}{LeftParen (}{Placeholder Obj *object}{RightParen )} (35) @@ -40,6 +41,8 @@ typedef void (^FooBlock)(Foo *someParameter); // CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType void}{TypedText onEventHandler}{LeftParen (}{Placeholder Foo *someParameter}{RightParen )} (35) // CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType FooBlock}{TypedText onEventHandler}{Equal = }{Placeholder ^(Foo *someParameter)} (38) // CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType void}{TypedText onReadonly}{LeftParen (}{Placeholder int *someParameter}{RightParen )} (35) +// CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType int}{TypedText processEvent}{LeftParen (}{Placeholder int eventCode}{RightParen )} (35) +// CHECK-CC1-NEXT: ObjCPropertyDecl:{ResultType int (^)(int)}{TypedText processEvent}{Equal = }{Placeholder ^int(int eventCode)} (32) - (void) takeInt:(int)x { } @@ -53,16 +56,17 @@ typedef void (^FooBlock)(Foo *someParameter); return self.foo; } -// RUN: c-index-test -code-completion-at=%s:47:9 %s | FileCheck -check-prefix=CHECK-NO %s -// RUN: c-index-test -code-completion-at=%s:48:16 %s | FileCheck -check-prefix=CHECK-NO %s -// RUN: c-index-test -code-completion-at=%s:49:23 %s | FileCheck -check-prefix=CHECK-NO %s -// RUN: c-index-test -code-completion-at=%s:50:12 %s | FileCheck -check-prefix=CHECK-NO %s -// RUN: c-index-test -code-completion-at=%s:51:15 %s | FileCheck -check-prefix=CHECK-NO %s -// RUN: c-index-test -code-completion-at=%s:53:15 %s | FileCheck -check-prefix=CHECK-NO %s +// RUN: c-index-test -code-completion-at=%s:50:9 %s | FileCheck -check-prefix=CHECK-NO %s +// RUN: c-index-test -code-completion-at=%s:51:16 %s | FileCheck -check-prefix=CHECK-NO %s +// RUN: c-index-test -code-completion-at=%s:52:23 %s | FileCheck -check-prefix=CHECK-NO %s +// RUN: c-index-test -code-completion-at=%s:53:12 %s | FileCheck -check-prefix=CHECK-NO %s +// RUN: c-index-test -code-completion-at=%s:54:15 %s | FileCheck -check-prefix=CHECK-NO %s +// RUN: c-index-test -code-completion-at=%s:56:15 %s | FileCheck -check-prefix=CHECK-NO %s // CHECK-NO: ObjCPropertyDecl:{ResultType int}{TypedText foo} (35) // CHECK-NO-NEXT: ObjCPropertyDecl:{ResultType Obj *}{TypedText obj} (35) // CHECK-NO-NEXT: ObjCPropertyDecl:{ResultType void (^)(Obj *)}{TypedText onAction} (35) // CHECK-NO-NEXT: ObjCPropertyDecl:{ResultType FooBlock}{TypedText onEventHandler} (35) // CHECK-NO-NEXT: ObjCPropertyDecl:{ResultType void (^)(int *)}{TypedText onReadonly} (35) +// CHECK-NO-NEXT: ObjCPropertyDecl:{ResultType int (^)(int)}{TypedText processEvent} (35) @end diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp index 287089d7c45e..59c4d5b277ce 100644 --- a/test/OpenMP/nvptx_target_codegen.cpp +++ b/test/OpenMP/nvptx_target_codegen.cpp @@ -8,9 +8,6 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: [[OMP_NT:@.+]] = common addrspace(3) global i32 0 -// CHECK-DAG: [[OMP_WID:@.+]] = common addrspace(3) global i64 0 - template struct TT{ tx X; @@ -26,19 +23,22 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l87}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l90}}_worker() + // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, + // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, + // CHECK: store i8* null, i8** [[OMP_WORK_FN]], + // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]], - // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0 + // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], + // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]] // // CHECK: [[SEL_WORKERS]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]] - // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]] + // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]], + // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0 // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]] // // CHECK: [[EXEC_PARALLEL]] @@ -54,31 +54,34 @@ int foo(int n) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l87]]() - // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[A:%.+]] = sub i32 [[WS]], 1 - // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1 - // CHECK: [[MID:%.+]] = and i32 [[B]], - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]] - // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]] - // - // CHECK: [[CHECK_WORKER]] - // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]] - // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]] + // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l90]]() + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // // CHECK: [[WORKER]] // CHECK: {{call|invoke}} void [[T1]]_worker() - // CHECK: br label {{%?}}[[EXIT]] + // CHECK: br label {{%?}}[[EXIT:.+]] // - // CHECK: [[MASTER]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]]) - // CHECK: br label {{%?}}[[TERM:.+]] + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] // - // CHECK: [[TERM]] - // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]], + // CHECK: [[MASTER]] + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] + // CHECK: br label {{%?}}[[TERMINATE:.+]] + // + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -93,19 +96,22 @@ int foo(int n) { { } - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l158}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l167}}_worker() + // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, + // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, + // CHECK: store i8* null, i8** [[OMP_WORK_FN]], + // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]], - // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0 + // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], + // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]] // // CHECK: [[SEL_WORKERS]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]] - // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]] + // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]], + // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0 // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]] // // CHECK: [[EXEC_PARALLEL]] @@ -121,35 +127,38 @@ int foo(int n) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l158]](i[[SZ:32|64]] [[ARG1:%[^)]+]]) + // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l167]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]]) // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], // CHECK: store i[[SZ]] [[ARG1]], i[[SZ]]* [[AA_ADDR]], // CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* - // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[A:%.+]] = sub i32 [[WS]], 1 - // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1 - // CHECK: [[MID:%.+]] = and i32 [[B]], - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]] - // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]] - // - // CHECK: [[CHECK_WORKER]] - // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]] - // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]] + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // // CHECK: [[WORKER]] - // CHECK: {{call|invoke}} void [[T3]]_worker() - // CHECK: br label {{%?}}[[EXIT]] + // CHECK: {{call|invoke}} void [[T2]]_worker() + // CHECK: br label {{%?}}[[EXIT:.+]] + // + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] // // CHECK: [[MASTER]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]]) + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK: load i16, i16* [[AA_CADDR]], - // CHECK: br label {{%?}}[[TERM:.+]] + // CHECK: br label {{%?}}[[TERMINATE:.+]] // - // CHECK: [[TERM]] - // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]], + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -160,19 +169,22 @@ int foo(int n) { aa += 1; } - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l261}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l276}}_worker() + // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, + // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, + // CHECK: store i8* null, i8** [[OMP_WORK_FN]], + // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]], - // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0 + // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], + // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]] // // CHECK: [[SEL_WORKERS]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]] - // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]] + // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]], + // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0 // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]] // // CHECK: [[EXEC_PARALLEL]] @@ -188,7 +200,7 @@ int foo(int n) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+foo.+l261]](i[[SZ]] + // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l276]](i[[SZ]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] // CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* @@ -219,26 +231,29 @@ int foo(int n) { // CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], // CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], // - // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[A:%.+]] = sub i32 [[WS]], 1 - // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1 - // CHECK: [[MID:%.+]] = and i32 [[B]], - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]] - // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]] - // - // CHECK: [[CHECK_WORKER]] - // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]] - // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]] + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // // CHECK: [[WORKER]] - // CHECK: {{call|invoke}} void [[T4]]_worker() - // CHECK: br label {{%?}}[[EXIT]] + // CHECK: {{call|invoke}} void [[T3]]_worker() + // CHECK: br label {{%?}}[[EXIT:.+]] + // + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] // // CHECK: [[MASTER]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]]) + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // // Use captures. // CHECK-64-DAG: load i32, i32* [[REF_A]] @@ -249,10 +264,10 @@ int foo(int n) { // CHECK-DAG: getelementptr inbounds double, double* [[REF_CN]], i[[SZ]] %{{.+}} // CHECK-DAG: getelementptr inbounds [[TT]], [[TT]]* [[REF_D]], i32 0, i32 0 // - // CHECK: br label {{%?}}[[TERM:.+]] + // CHECK: br label {{%?}}[[TERMINATE:.+]] // - // CHECK: [[TERM]] - // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]], + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -338,19 +353,22 @@ int bar(int n){ return a; } - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+l298}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+313}}_worker() + // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, + // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, + // CHECK: store i8* null, i8** [[OMP_WORK_FN]], + // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]], - // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0 + // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], + // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]] // // CHECK: [[SEL_WORKERS]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]] - // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]] + // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]], + // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0 // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]] // // CHECK: [[EXEC_PARALLEL]] @@ -366,7 +384,7 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+static.+l298]](i[[SZ]] + // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l313]](i[[SZ]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] // CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] @@ -382,36 +400,37 @@ int bar(int n){ // CHECK-DAG: [[REF_AAA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* // CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], // - // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[A:%.+]] = sub i32 [[WS]], 1 - // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1 - // CHECK: [[MID:%.+]] = and i32 [[B]], - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]] - // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]] - // - // CHECK: [[CHECK_WORKER]] - // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]] - // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]] + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // // CHECK: [[WORKER]] - // CHECK: {{call|invoke}} void [[T5]]_worker() - // CHECK: br label {{%?}}[[EXIT]] + // CHECK: {{call|invoke}} void [[T4]]_worker() + // CHECK: br label {{%?}}[[EXIT:.+]] // - // CHECK: [[MASTER]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]]) + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] // + // CHECK: [[MASTER]] + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK-64-DAG: load i32, i32* [[REF_A]] // CHECK-32-DAG: load i32, i32* [[LOCAL_A]] // CHECK-DAG: load i16, i16* [[REF_AA]] // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 + // CHECK: br label {{%?}}[[TERMINATE:.+]] // - // CHECK: br label {{%?}}[[TERM:.+]] - // - // CHECK: [[TERM]] - // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]], + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -420,19 +439,22 @@ int bar(int n){ - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l316}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l331}}_worker() + // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, + // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, + // CHECK: store i8* null, i8** [[OMP_WORK_FN]], + // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]], - // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0 + // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], + // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]] // // CHECK: [[SEL_WORKERS]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]] - // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]] + // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]], + // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0 // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]] // // CHECK: [[EXEC_PARALLEL]] @@ -448,7 +470,7 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+S1.+l316]]( + // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l331]]( // Create local storage for each capture. // CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1:%struct.*]]* // CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] @@ -466,35 +488,39 @@ int bar(int n){ // CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], // CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], // CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], - // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[A:%.+]] = sub i32 [[WS]], 1 - // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1 - // CHECK: [[MID:%.+]] = and i32 [[B]], - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]] - // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]] - // - // CHECK: [[CHECK_WORKER]] - // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]] - // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]] + // + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // // CHECK: [[WORKER]] - // CHECK: {{call|invoke}} void [[T6]]_worker() - // CHECK: br label {{%?}}[[EXIT]] + // CHECK: {{call|invoke}} void [[T5]]_worker() + // CHECK: br label {{%?}}[[EXIT:.+]] + // + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] // // CHECK: [[MASTER]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]]) + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // Use captures. // CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0 // CHECK-64-DAG:load i32, i32* [[REF_B]] // CHECK-32-DAG:load i32, i32* [[LOCAL_B]] // CHECK-DAG: getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}} - // CHECK: br label {{%?}}[[TERM:.+]] + // CHECK: br label {{%?}}[[TERMINATE:.+]] // - // CHECK: [[TERM]] - // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]], + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -503,19 +529,22 @@ int bar(int n){ - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l281}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l296}}_worker() + // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, + // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, + // CHECK: store i8* null, i8** [[OMP_WORK_FN]], + // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]], - // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0 + // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], + // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null // CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]] // // CHECK: [[SEL_WORKERS]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]] - // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]] + // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]], + // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0 // CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]] // // CHECK: [[EXEC_PARALLEL]] @@ -531,7 +560,7 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T7:@__omp_offloading_.+template.+l281]](i[[SZ]] + // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l296]](i[[SZ]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] // CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] @@ -544,36 +573,39 @@ int bar(int n){ // CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* // CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], // - // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[A:%.+]] = sub i32 [[WS]], 1 - // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1 - // CHECK: [[MID:%.+]] = and i32 [[B]], - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]] - // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]] - // - // CHECK: [[CHECK_WORKER]] - // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]] - // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]] + // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] + // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // // CHECK: [[WORKER]] - // CHECK: {{call|invoke}} void [[T7]]_worker() - // CHECK: br label {{%?}}[[EXIT]] + // CHECK: {{call|invoke}} void [[T6]]_worker() + // CHECK: br label {{%?}}[[EXIT:.+]] + // + // CHECK: [[CHECK_MASTER]] + // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], + // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]] // // CHECK: [[MASTER]] - // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]]) + // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() + // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // // CHECK-64-DAG: load i32, i32* [[REF_A]] // CHECK-32-DAG: load i32, i32* [[LOCAL_A]] // CHECK-DAG: load i16, i16* [[REF_AA]] // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 // - // CHECK: br label {{%?}}[[TERM:.+]] + // CHECK: br label {{%?}}[[TERMINATE:.+]] // - // CHECK: [[TERM]] - // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]], + // CHECK: [[TERMINATE]] + // CHECK: call void @__kmpc_kernel_deinit() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // diff --git a/test/OpenMP/target_codegen.cpp b/test/OpenMP/target_codegen.cpp index f263ebdd2fe3..b5e4b07cce04 100644 --- a/test/OpenMP/target_codegen.cpp +++ b/test/OpenMP/target_codegen.cpp @@ -22,11 +22,11 @@ // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } // CHECK-DAG: [[S1:%.+]] = type { double } -// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } // CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } // CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } -// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}} } +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } // We have 8 target regions, but only 7 that actually will generate offloading // code, only 6 will have mapped arguments, and only 4 have all-constant map diff --git a/test/OpenMP/target_codegen_registration.cpp b/test/OpenMP/target_codegen_registration.cpp index a440faff9158..f2721b77fec0 100644 --- a/test/OpenMP/target_codegen_registration.cpp +++ b/test/OpenMP/target_codegen_registration.cpp @@ -30,11 +30,11 @@ // CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } // CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } // CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } -// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } // CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } // CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } -// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } // CHECK-DAG: [[A1:@.+]] = internal global [[SA]] // CHECK-DAG: [[A2:@.+]] = global [[SA]] @@ -100,54 +100,54 @@ // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i // CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" -// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" -// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" -// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" -// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" -// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" -// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" -// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" -// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" -// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" -// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" -// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" -// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" -// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" -// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" -// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" -// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" -// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" -// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" -// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" -// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" -// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" -// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" -// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" -// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 // CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] // CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] diff --git a/test/OpenMP/teams_distribute_collapse_messages.cpp b/test/OpenMP/teams_distribute_collapse_messages.cpp index 9ce58e0b0650..37c10e5986bf 100644 --- a/test/OpenMP/teams_distribute_collapse_messages.cpp +++ b/test/OpenMP/teams_distribute_collapse_messages.cpp @@ -66,7 +66,8 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; -#pragma omp distribute collapse (S) // expected-error {{'S' does not refer to a value}} +#pragma omp target +#pragma omp teams distribute collapse (S) // expected-error {{'S' does not refer to a value}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; diff --git a/test/Preprocessor/cuda-types.cu b/test/Preprocessor/cuda-types.cu index 2b6160b8d6c7..5f7b91655cdf 100644 --- a/test/Preprocessor/cuda-types.cu +++ b/test/Preprocessor/cuda-types.cu @@ -28,3 +28,19 @@ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ // RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-device-defines-filtered // RUN: diff %T/powerpc64-host-defines-filtered %T/powerpc64-device-defines-filtered + +// RUN: %clang --cuda-host-only -nocudainc -target i386-windows-msvc -x cuda -E -dM -o - /dev/null \ +// RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-msvc-host-defines-filtered +// RUN: %clang --cuda-device-only -nocudainc -nocudalib -target i386-windows-msvc -x cuda -E -dM -o - /dev/null \ +// RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-msvc-device-defines-filtered +// RUN: diff %T/i386-msvc-host-defines-filtered %T/i386-msvc-device-defines-filtered + +// RUN: %clang --cuda-host-only -nocudainc -target x86_64-windows-msvc -x cuda -E -dM -o - /dev/null \ +// RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-msvc-host-defines-filtered +// RUN: %clang --cuda-device-only -nocudainc -nocudalib -target x86_64-windows-msvc -x cuda -E -dM -o - /dev/null \ +// RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-msvc-device-defines-filtered +// RUN: diff %T/x86_64-msvc-host-defines-filtered %T/x86_64-msvc-device-defines-filtered diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c index b003404df6ff..8b8901931e7a 100644 --- a/test/Preprocessor/init.c +++ b/test/Preprocessor/init.c @@ -9189,3 +9189,174 @@ // RUN: %clang_cc1 -E -dM -ffreestanding -triple x86_64-windows-cygnus < /dev/null | FileCheck -match-full-lines -check-prefix CYGWIN-X64 %s // CYGWIN-X64: #define __USER_LABEL_PREFIX__ +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=avr \ +// RUN: < /dev/null \ +// RUN: | FileCheck -match-full-lines -check-prefix=AVR %s +// +// AVR:#define __ATOMIC_ACQUIRE 2 +// AVR:#define __ATOMIC_ACQ_REL 4 +// AVR:#define __ATOMIC_CONSUME 1 +// AVR:#define __ATOMIC_RELAXED 0 +// AVR:#define __ATOMIC_RELEASE 3 +// AVR:#define __ATOMIC_SEQ_CST 5 +// AVR:#define __AVR__ 1 +// AVR:#define __BIGGEST_ALIGNMENT__ 1 +// AVR:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +// AVR:#define __CHAR16_TYPE__ unsigned int +// AVR:#define __CHAR32_TYPE__ long unsigned int +// AVR:#define __CHAR_BIT__ 8 +// AVR:#define __DBL_DECIMAL_DIG__ 9 +// AVR:#define __DBL_DENORM_MIN__ 1.40129846e-45 +// AVR:#define __DBL_DIG__ 6 +// AVR:#define __DBL_EPSILON__ 1.19209290e-7 +// AVR:#define __DBL_HAS_DENORM__ 1 +// AVR:#define __DBL_HAS_INFINITY__ 1 +// AVR:#define __DBL_HAS_QUIET_NAN__ 1 +// AVR:#define __DBL_MANT_DIG__ 24 +// AVR:#define __DBL_MAX_10_EXP__ 38 +// AVR:#define __DBL_MAX_EXP__ 128 +// AVR:#define __DBL_MAX__ 3.40282347e+38 +// AVR:#define __DBL_MIN_10_EXP__ (-37) +// AVR:#define __DBL_MIN_EXP__ (-125) +// AVR:#define __DBL_MIN__ 1.17549435e-38 +// AVR:#define __FINITE_MATH_ONLY__ 0 +// AVR:#define __FLT_DECIMAL_DIG__ 9 +// AVR:#define __FLT_DENORM_MIN__ 1.40129846e-45F +// AVR:#define __FLT_DIG__ 6 +// AVR:#define __FLT_EPSILON__ 1.19209290e-7F +// AVR:#define __FLT_EVAL_METHOD__ 0 +// AVR:#define __FLT_HAS_DENORM__ 1 +// AVR:#define __FLT_HAS_INFINITY__ 1 +// AVR:#define __FLT_HAS_QUIET_NAN__ 1 +// AVR:#define __FLT_MANT_DIG__ 24 +// AVR:#define __FLT_MAX_10_EXP__ 38 +// AVR:#define __FLT_MAX_EXP__ 128 +// AVR:#define __FLT_MAX__ 3.40282347e+38F +// AVR:#define __FLT_MIN_10_EXP__ (-37) +// AVR:#define __FLT_MIN_EXP__ (-125) +// AVR:#define __FLT_MIN__ 1.17549435e-38F +// AVR:#define __FLT_RADIX__ 2 +// AVR:#define __GCC_ATOMIC_BOOL_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_CHAR_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_INT_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_LLONG_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_LONG_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_POINTER_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_SHORT_LOCK_FREE 1 +// AVR:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// AVR:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 1 +// AVR:#define __GXX_ABI_VERSION 1002 +// AVR:#define __INT16_C_SUFFIX__ +// AVR:#define __INT16_MAX__ 32767 +// AVR:#define __INT16_TYPE__ short +// AVR:#define __INT32_C_SUFFIX__ L +// AVR:#define __INT32_MAX__ 2147483647L +// AVR:#define __INT32_TYPE__ long int +// AVR:#define __INT64_C_SUFFIX__ LL +// AVR:#define __INT64_MAX__ 9223372036854775807LL +// AVR:#define __INT64_TYPE__ long long int +// AVR:#define __INT8_C_SUFFIX__ +// AVR:#define __INT8_MAX__ 127 +// AVR:#define __INT8_TYPE__ signed char +// AVR:#define __INTMAX_C_SUFFIX__ LL +// AVR:#define __INTMAX_MAX__ 9223372036854775807LL +// AVR:#define __INTMAX_TYPE__ long long int +// AVR:#define __INTPTR_MAX__ 32767 +// AVR:#define __INTPTR_TYPE__ int +// AVR:#define __INT_FAST16_MAX__ 32767 +// AVR:#define __INT_FAST16_TYPE__ int +// AVR:#define __INT_FAST32_MAX__ 2147483647L +// AVR:#define __INT_FAST32_TYPE__ long int +// AVR:#define __INT_FAST64_MAX__ 9223372036854775807LL +// AVR:#define __INT_FAST64_TYPE__ long long int +// AVR:#define __INT_FAST8_MAX__ 127 +// AVR:#define __INT_FAST8_TYPE__ signed char +// AVR:#define __INT_LEAST16_MAX__ 32767 +// AVR:#define __INT_LEAST16_TYPE__ int +// AVR:#define __INT_LEAST32_MAX__ 2147483647L +// AVR:#define __INT_LEAST32_TYPE__ long int +// AVR:#define __INT_LEAST64_MAX__ 9223372036854775807LL +// AVR:#define __INT_LEAST64_TYPE__ long long int +// AVR:#define __INT_LEAST8_MAX__ 127 +// AVR:#define __INT_LEAST8_TYPE__ signed char +// AVR:#define __INT_MAX__ 32767 +// AVR:#define __LDBL_DECIMAL_DIG__ 9 +// AVR:#define __LDBL_DENORM_MIN__ 1.40129846e-45L +// AVR:#define __LDBL_DIG__ 6 +// AVR:#define __LDBL_EPSILON__ 1.19209290e-7L +// AVR:#define __LDBL_HAS_DENORM__ 1 +// AVR:#define __LDBL_HAS_INFINITY__ 1 +// AVR:#define __LDBL_HAS_QUIET_NAN__ 1 +// AVR:#define __LDBL_MANT_DIG__ 24 +// AVR:#define __LDBL_MAX_10_EXP__ 38 +// AVR:#define __LDBL_MAX_EXP__ 128 +// AVR:#define __LDBL_MAX__ 3.40282347e+38L +// AVR:#define __LDBL_MIN_10_EXP__ (-37) +// AVR:#define __LDBL_MIN_EXP__ (-125) +// AVR:#define __LDBL_MIN__ 1.17549435e-38L +// AVR:#define __LONG_LONG_MAX__ 9223372036854775807LL +// AVR:#define __LONG_MAX__ 2147483647L +// AVR:#define __NO_INLINE__ 1 +// AVR:#define __ORDER_BIG_ENDIAN__ 4321 +// AVR:#define __ORDER_LITTLE_ENDIAN__ 1234 +// AVR:#define __ORDER_PDP_ENDIAN__ 3412 +// AVR:#define __PRAGMA_REDEFINE_EXTNAME 1 +// AVR:#define __PTRDIFF_MAX__ 32767 +// AVR:#define __PTRDIFF_TYPE__ int +// AVR:#define __SCHAR_MAX__ 127 +// AVR:#define __SHRT_MAX__ 32767 +// AVR:#define __SIG_ATOMIC_MAX__ 127 +// AVR:#define __SIG_ATOMIC_WIDTH__ 8 +// AVR:#define __SIZEOF_DOUBLE__ 4 +// AVR:#define __SIZEOF_FLOAT__ 4 +// AVR:#define __SIZEOF_INT__ 2 +// AVR:#define __SIZEOF_LONG_DOUBLE__ 4 +// AVR:#define __SIZEOF_LONG_LONG__ 8 +// AVR:#define __SIZEOF_LONG__ 4 +// AVR:#define __SIZEOF_POINTER__ 2 +// AVR:#define __SIZEOF_PTRDIFF_T__ 2 +// AVR:#define __SIZEOF_SHORT__ 2 +// AVR:#define __SIZEOF_SIZE_T__ 2 +// AVR:#define __SIZEOF_WCHAR_T__ 2 +// AVR:#define __SIZEOF_WINT_T__ 2 +// AVR:#define __SIZE_MAX__ 65535U +// AVR:#define __SIZE_TYPE__ unsigned int +// AVR:#define __STDC__ 1 +// AVR:#define __UINT16_MAX__ 65535U +// AVR:#define __UINT16_TYPE__ unsigned short +// AVR:#define __UINT32_C_SUFFIX__ UL +// AVR:#define __UINT32_MAX__ 4294967295UL +// AVR:#define __UINT32_TYPE__ long unsigned int +// AVR:#define __UINT64_C_SUFFIX__ ULL +// AVR:#define __UINT64_MAX__ 18446744073709551615ULL +// AVR:#define __UINT64_TYPE__ long long unsigned int +// AVR:#define __UINT8_C_SUFFIX__ +// AVR:#define __UINT8_MAX__ 255 +// AVR:#define __UINT8_TYPE__ unsigned char +// AVR:#define __UINTMAX_C_SUFFIX__ ULL +// AVR:#define __UINTMAX_MAX__ 18446744073709551615ULL +// AVR:#define __UINTMAX_TYPE__ long long unsigned int +// AVR:#define __UINTPTR_MAX__ 65535U +// AVR:#define __UINTPTR_TYPE__ unsigned int +// AVR:#define __UINT_FAST16_MAX__ 65535U +// AVR:#define __UINT_FAST16_TYPE__ unsigned int +// AVR:#define __UINT_FAST32_MAX__ 4294967295UL +// AVR:#define __UINT_FAST32_TYPE__ long unsigned int +// AVR:#define __UINT_FAST64_MAX__ 18446744073709551615ULL +// AVR:#define __UINT_FAST64_TYPE__ long long unsigned int +// AVR:#define __UINT_FAST8_MAX__ 255 +// AVR:#define __UINT_FAST8_TYPE__ unsigned char +// AVR:#define __UINT_LEAST16_MAX__ 65535U +// AVR:#define __UINT_LEAST16_TYPE__ unsigned int +// AVR:#define __UINT_LEAST32_MAX__ 4294967295UL +// AVR:#define __UINT_LEAST32_TYPE__ long unsigned int +// AVR:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL +// AVR:#define __UINT_LEAST64_TYPE__ long long unsigned int +// AVR:#define __UINT_LEAST8_MAX__ 255 +// AVR:#define __UINT_LEAST8_TYPE__ unsigned char +// AVR:#define __USER_LABEL_PREFIX__ +// AVR:#define __WCHAR_MAX__ 32767 +// AVR:#define __WCHAR_TYPE__ int +// AVR:#define __WINT_TYPE__ int diff --git a/test/Sema/warn-cast-align.c b/test/Sema/warn-cast-align.c index e8f85bc14d8d..389c0c17d2f7 100644 --- a/test/Sema/warn-cast-align.c +++ b/test/Sema/warn-cast-align.c @@ -59,3 +59,11 @@ void test4() { i = (int *)&s.s0; i = (int *)a; } + +// No warnings. +typedef int (*FnTy)(void); +unsigned int func5(void); + +FnTy test5(void) { + return (FnTy)&func5; +} diff --git a/test/Sema/warn-strict-prototypes.m b/test/Sema/warn-strict-prototypes.m index cbb01a1f7b21..4567dab01930 100644 --- a/test/Sema/warn-strict-prototypes.m +++ b/test/Sema/warn-strict-prototypes.m @@ -14,7 +14,8 @@ void foo() { void (^block)() = // expected-warning {{this function declaration is not a prototype}} ^void(int arg) { // no warning }; - void (^block2)(void) = // no warning - ^void() { // expected-warning {{this function declaration is not a prototype}} + void (^block2)(void) = ^void() { // no warning + }; + void (^block3)(void) = ^ { // no warning }; } diff --git a/test/Sema/warn-thread-safety-analysis.c b/test/Sema/warn-thread-safety-analysis.c index a0c4026b9136..425ce4c196a6 100644 --- a/test/Sema/warn-thread-safety-analysis.c +++ b/test/Sema/warn-thread-safety-analysis.c @@ -127,3 +127,7 @@ int main() { return 0; } + +// We had a problem where we'd skip all attributes that follow a late-parsed +// attribute in a single __attribute__. +void run() __attribute__((guarded_by(mu1), guarded_by(mu1))); // expected-warning 2{{only applies to fields and global variables}} diff --git a/test/SemaCUDA/attr-declspec.cu b/test/SemaCUDA/attr-declspec.cu new file mode 100644 index 000000000000..dda12ce8a51f --- /dev/null +++ b/test/SemaCUDA/attr-declspec.cu @@ -0,0 +1,34 @@ +// Test the __declspec spellings of CUDA attributes. +// +// RUN: %clang_cc1 -fsyntax-only -fms-extensions -verify %s +// RUN: %clang_cc1 -fsyntax-only -fms-extensions -fcuda-is-device -verify %s +// Now pretend that we're compiling a C file. There should be warnings. +// RUN: %clang_cc1 -DEXPECT_WARNINGS -fms-extensions -fsyntax-only -verify -x c %s + +#if defined(EXPECT_WARNINGS) +// expected-warning@+12 {{'__device__' attribute ignored}} +// expected-warning@+12 {{'__global__' attribute ignored}} +// expected-warning@+12 {{'__constant__' attribute ignored}} +// expected-warning@+12 {{'__shared__' attribute ignored}} +// expected-warning@+12 {{'__host__' attribute ignored}} +// +// (Currently we don't for the other attributes. They are implemented with +// IgnoredAttr, which is ignored irrespective of any LangOpts.) +#else +// expected-no-diagnostics +#endif + +__declspec(__device__) void f_device(); +__declspec(__global__) void f_global(); +__declspec(__constant__) int* g_constant; +__declspec(__shared__) float *g_shared; +__declspec(__host__) void f_host(); +__declspec(__device_builtin__) void f_device_builtin(); +typedef __declspec(__device_builtin__) const void *t_device_builtin; +enum __declspec(__device_builtin__) e_device_builtin {E}; +__declspec(__device_builtin__) int v_device_builtin; +__declspec(__cudart_builtin__) void f_cudart_builtin(); +__declspec(__device_builtin_surface_type__) unsigned long long surface_var; +__declspec(__device_builtin_texture_type__) unsigned long long texture_var; + +// Note that there's no __declspec spelling of nv_weak. diff --git a/test/SemaCUDA/cuda-inherits-calling-conv.cu b/test/SemaCUDA/cuda-inherits-calling-conv.cu new file mode 100644 index 000000000000..67c438fa621b --- /dev/null +++ b/test/SemaCUDA/cuda-inherits-calling-conv.cu @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -std=c++11 -triple i386-windows-msvc \ +// RUN: -aux-triple nvptx-nvidia-cuda -fsyntax-only -verify %s + +// RUN: %clang_cc1 -std=c++11 -triple nvptx-nvidia-cuda \ +// RUN: -aux-triple i386-windows-msvc -fsyntax-only \ +// RUN: -fcuda-is-device -verify %s + +// RUN: %clang_cc1 -std=c++11 -triple nvptx-nvidia-cuda \ +// RUN: -aux-triple x86_64-linux-gnu -fsyntax-only \ +// RUN: -fcuda-is-device -verify -verify-ignore-unexpected=note \ +// RUN: -DEXPECT_ERR %s + +// CUDA device code should inherit the host's calling conventions. + +template +struct Foo; + +template +struct Foo {}; + +// On x86_64-linux-gnu, this is a redefinition of the template, because the +// __fastcall calling convention doesn't exist (and is therefore ignored). +#ifndef EXPECT_ERR +// expected-no-diagnostics +#else +// expected-error@+4 {{redefinition of 'Foo}} +// expected-warning@+3 {{calling convention '__fastcall' ignored}} +#endif +template +struct Foo {}; diff --git a/test/SemaCXX/constant-expression-cxx11.cpp b/test/SemaCXX/constant-expression-cxx11.cpp index 581a524339e7..884f2f30c42f 100644 --- a/test/SemaCXX/constant-expression-cxx11.cpp +++ b/test/SemaCXX/constant-expression-cxx11.cpp @@ -1725,7 +1725,7 @@ namespace AfterError { constexpr int error() { // expected-error {{no return statement}} return foobar; // expected-error {{undeclared identifier}} } - constexpr int k = error(); // expected-error {{must be initialized by a constant expression}} + constexpr int k = error(); } namespace std { @@ -2030,7 +2030,7 @@ namespace PR21786 { namespace PR21859 { constexpr int Fun() { return; } // expected-error {{non-void constexpr function 'Fun' should return a value}} - constexpr int Var = Fun(); // expected-error {{constexpr variable 'Var' must be initialized by a constant expression}} + constexpr int Var = Fun(); } struct InvalidRedef { diff --git a/test/SemaCXX/conversion-function.cpp b/test/SemaCXX/conversion-function.cpp index c725a0d5b7c1..531de818b680 100644 --- a/test/SemaCXX/conversion-function.cpp +++ b/test/SemaCXX/conversion-function.cpp @@ -440,7 +440,7 @@ namespace PR18234 { #endif } a; A::S s = a; // expected-error {{no viable conversion from 'struct A' to 'A::S'}} - A::E e = a; // expected-note {{here}} + A::E e = a; bool k1 = e == A::e; // expected-error {{no member named 'e'}} bool k2 = e.n == 0; } diff --git a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp index 75c6734bce30..9b8fadd2f522 100644 --- a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp +++ b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp @@ -105,6 +105,7 @@ T deduce_ref(const std::initializer_list&); // expected-note {{conflicting ty template struct pair { pair(...); }; template void deduce_pairs(std::initializer_list>); +// expected-note@-1 {{deduced type 'pair<[...], typename WithIntType::type>' of element of 1st parameter does not match adjusted type 'pair<[...], float>' of element of argument [with T = WithIntType]}} struct WithIntType { typedef int type; }; template void deduce_after_init_list_in_pack(void (*)(T...), T...); // expected-note {{ vs. <(no value), double>}} @@ -123,7 +124,7 @@ void argument_deduction() { pair pi; pair pf; deduce_pairs({pi, pi, pi}); // ok - deduce_pairs({pi, pf, pi}); // FIXME: This should be rejected, as we fail to produce a type that exactly matches the argument type. + deduce_pairs({pi, pf, pi}); // expected-error {{no matching function}} deduce_after_init_list_in_pack((void(*)(int,int))0, {}, 0); deduce_after_init_list_in_pack((void(*)(int,int))0, {}, 0.0); // expected-error {{no matching function}} @@ -298,9 +299,18 @@ namespace TemporaryInitListSourceRange_PR22367 { namespace ParameterPackNestedInitializerLists_PR23904c3 { template - void f(std::initializer_list> ...tt); + void f(std::initializer_list> ...tt); // expected-note 2{{conflicting}} expected-note {{incomplete pack}} - void foo() { f({{0}}, {{'\0'}}); } + void foo() { + f({{0}}, {{'\0'}}); // ok, T = + f({{0}, {'\0'}}); // expected-error {{no match}} + f({{0, '\0'}}); // expected-error {{no match}} + + f({{0}}, {{{}}}); // expected-error {{no match}} + f({{0}}, {{{}, '\0'}}); // ok, T = + f({{0}, {{}}}); // ok, T = + f({{0, {}}}); // ok, T = + } } namespace update_rbrace_loc_crash { @@ -327,3 +337,13 @@ namespace update_rbrace_loc_crash { Explode([](int) {}); } } + +namespace no_conversion_after_auto_list_deduction { + // We used to deduce 'auto' == 'std::initializer_list' here, and then + // incorrectly accept the declaration of 'x'. + struct X { using T = std::initializer_list X::*; operator T(); }; + auto X::*x = { X() }; // expected-error {{from initializer list}} + + struct Y { using T = std::initializer_list(*)(); operator T(); }; + auto (*y)() = { Y() }; // expected-error {{from initializer list}} +} diff --git a/test/SemaCXX/cxx1z-decomposition.cpp b/test/SemaCXX/cxx1z-decomposition.cpp index 735a9e1dfee0..d457ace5d844 100644 --- a/test/SemaCXX/cxx1z-decomposition.cpp +++ b/test/SemaCXX/cxx1z-decomposition.cpp @@ -65,4 +65,9 @@ void for_range() { } } +int error_recovery() { + auto [foobar]; // expected-error {{requires an initializer}} + return foobar_; // expected-error {{undeclared identifier 'foobar_'}} +} + // FIXME: by-value array copies diff --git a/test/SemaCXX/default-arg-closures.cpp b/test/SemaCXX/default-arg-closures.cpp index e076cc05cd20..676bd486105f 100644 --- a/test/SemaCXX/default-arg-closures.cpp +++ b/test/SemaCXX/default-arg-closures.cpp @@ -4,16 +4,15 @@ // instantiating and checking the semantics of default arguments. Make sure we // do that right. -// FIXME: Don't diagnose this issue twice. template -struct DependentDefaultCtorArg { // expected-note {{in instantiation of default function argument}} - // expected-error@+1 2 {{type 'int' cannot be used prior to '::' because it has no members}} +struct DependentDefaultCtorArg { + // expected-error@+1 {{type 'int' cannot be used prior to '::' because it has no members}} DependentDefaultCtorArg(int n = T::error); }; struct __declspec(dllexport) // expected-note {{due to 'ExportDefaultCtorClosure' being dllexported}} -ExportDefaultCtorClosure // expected-note {{implicit default constructor for 'ExportDefaultCtorClosure' first required here}} -: DependentDefaultCtorArg // expected-note {{in instantiation of template class}} +ExportDefaultCtorClosure // expected-note {{in instantiation of default function argument expression for 'DependentDefaultCtorArg' required here}} expected-note {{implicit default constructor for 'ExportDefaultCtorClosure' first required here}} +: DependentDefaultCtorArg {}; template diff --git a/test/SemaCXX/dllexport.cpp b/test/SemaCXX/dllexport.cpp index b4850fc03d9b..a3fed70ec958 100644 --- a/test/SemaCXX/dllexport.cpp +++ b/test/SemaCXX/dllexport.cpp @@ -741,6 +741,27 @@ struct __declspec(dllexport) ClassWithMultipleDefaultCtors { ClassWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} ClassWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}} }; +template +struct ClassTemplateWithMultipleDefaultCtors { + __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} + __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}} +}; + +template struct HasDefaults { + HasDefaults(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} +}; +template struct __declspec(dllexport) HasDefaults; + +template struct +__declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults' required here}} +HasDefaults; // expected-note {{in instantiation of member function 'HasDefaults::HasDefaults' requested here}} + +template struct HasDefaults2 { + __declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults2' required here}} + HasDefaults2(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} +}; +template struct HasDefaults2; // expected-note {{in instantiation of member function 'HasDefaults2::HasDefaults2' requested here}} + #endif //===----------------------------------------------------------------------===// diff --git a/test/SemaCXX/type-definition-in-specifier.cpp b/test/SemaCXX/type-definition-in-specifier.cpp index 74ba058b4f12..2da649fdb0b8 100644 --- a/test/SemaCXX/type-definition-in-specifier.cpp +++ b/test/SemaCXX/type-definition-in-specifier.cpp @@ -59,10 +59,8 @@ struct s19018b { }; struct pr18963 { - short bar5 (struct foo4 {} bar2); // expected-error{{'foo4' cannot be defined in a parameter type}} \ - // expected-note{{declared here}} - - long foo5 (float foo6 = foo4); // expected-error{{'foo4' does not refer to a value}} + short bar5 (struct foo4 {} bar2); // expected-error{{'foo4' cannot be defined in a parameter type}} + long foo5 (float foo6 = foo4); }; // expected-error@+2 {{cannot be defined in a parameter type}} diff --git a/test/SemaObjC/block-omitted-return-type.m b/test/SemaObjC/block-omitted-return-type.m index 20e32e01865e..93d5e05ea282 100644 --- a/test/SemaObjC/block-omitted-return-type.m +++ b/test/SemaObjC/block-omitted-return-type.m @@ -24,7 +24,7 @@ return; }; void (^simpleBlock5)() = ^ const void { //expected-error {{incompatible block pointer types initializing 'void (^)()' with an expression of type 'const void (^)(void)'}} - return; + return; // expected-warning@-1 {{function cannot return qualified void type 'const void'}} }; void (^simpleBlock6)() = ^ const (void) { //expected-warning {{'const' qualifier on omitted return type '' has no effect}} return; diff --git a/test/SemaOpenCL/extensions.cl b/test/SemaOpenCL/extensions.cl index c27f3397cd79..6afb11e42a6a 100644 --- a/test/SemaOpenCL/extensions.cl +++ b/test/SemaOpenCL/extensions.cl @@ -22,6 +22,17 @@ // RUN: %clang_cc1 %s -triple spir-unknown-unknown -verify -pedantic -fsyntax-only -cl-ext=-all -cl-ext=+cl_khr_fp64 -cl-ext=+cl_khr_fp16 -cl-ext=-cl_khr_fp64 -DNOFP64 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -verify -pedantic -fsyntax-only -cl-ext=-all -cl-ext=+cl_khr_fp64,-cl_khr_fp64,+cl_khr_fp16 -DNOFP64 +// Test with -finclude-default-header, which includes opencl-c.h. opencl-c.h +// disables all extensions by default, but supported core extensions for a +// particular OpenCL version must be re-enabled (for example, cl_khr_fp64 is +// enabled by default with -cl-std=CL2.0). +// +// RUN: %clang_cc1 %s -triple amdgcn-unknown-unknown -verify -pedantic -fsyntax-only -cl-std=CL2.0 -finclude-default-header + +#ifdef _OPENCL_H_ +// expected-no-diagnostics +#endif + #ifdef FP64 // expected-no-diagnostics #endif @@ -33,6 +44,7 @@ void f1(double da) { // expected-error {{type 'double' requires cl_khr_fp64 exte } #endif +#ifndef _OPENCL_H_ int isnan(float x) { return __builtin_isnan(x); } @@ -40,6 +52,7 @@ int isnan(float x) { int isfinite(float x) { return __builtin_isfinite(x); } +#endif #pragma OPENCL EXTENSION cl_khr_fp64 : enable #ifdef NOFP64 diff --git a/test/SemaTemplate/deduction.cpp b/test/SemaTemplate/deduction.cpp index 5695cab9a27e..2275a8b3b7ad 100644 --- a/test/SemaTemplate/deduction.cpp +++ b/test/SemaTemplate/deduction.cpp @@ -407,3 +407,38 @@ namespace overload_vs_pack { void test() { j(x, f, x); } } } + +namespace b29946541 { + template class A {}; + template class C> + void f(C); // expected-note {{failed template argument deduction}} + void g(A a) { f(a); } // expected-error {{no match}} +} + +namespace deduction_from_empty_list { + template void f(int (&&)[N], int (&&)[N]) { // expected-note {{1 vs. 2}} + static_assert(M == N, ""); + } + + void test() { + f<5>({}, {}); + f<1>({}, {0}); + f<1>({0}, {}); + f<1>({0}, {0}); + f<1>({0}, {0, 1}); // expected-error {{no matching}} + } +} + +namespace check_extended_pack { + template struct X { typedef int type; }; + template void f(typename X::type...); + template void f(T, int, int); + void g() { + f(0, 0, 0); + } + + template struct Y {}; + template void g(Y); // expected-note {{deduced non-type template argument does not have the same type as the corresponding template parameter ('int *' vs 'int')}} + int n; + void h() { g<0>(Y<0, &n>()); } // expected-error {{no matching function}} +} diff --git a/test/SemaTemplate/instantiate-local-class.cpp b/test/SemaTemplate/instantiate-local-class.cpp index a61af7a5af38..eaff4c4bbc8d 100644 --- a/test/SemaTemplate/instantiate-local-class.cpp +++ b/test/SemaTemplate/instantiate-local-class.cpp @@ -475,3 +475,14 @@ namespace rdar23721638 { } template void bar(); // expected-note {{in instantiation}} } + +namespace anon_union_default_member_init { + template void f() { + struct S { + union { + int i = 0; + }; + }; + } + void g() { f(); } +} diff --git a/tools/c-index-test/core_main.cpp b/tools/c-index-test/core_main.cpp index 3e4052c93ef5..8976d9134916 100644 --- a/tools/c-index-test/core_main.cpp +++ b/tools/c-index-test/core_main.cpp @@ -140,8 +140,7 @@ static bool printSourceSymbols(ArrayRef Args) { ArgsWithProgName.append(Args.begin(), Args.end()); IntrusiveRefCntPtr Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions)); - IntrusiveRefCntPtr - CInvok(createInvocationFromCommandLine(ArgsWithProgName, Diags)); + auto CInvok = createInvocationFromCommandLine(ArgsWithProgName, Diags); if (!CInvok) return true; @@ -153,7 +152,7 @@ static bool printSourceSymbols(ArrayRef Args) { auto PCHContainerOps = std::make_shared(); std::unique_ptr Unit(ASTUnit::LoadFromCompilerInvocationAction( - CInvok.get(), PCHContainerOps, Diags, IndexAction.get())); + std::move(CInvok), PCHContainerOps, Diags, IndexAction.get())); if (!Unit) return true; diff --git a/tools/clang-import-test/clang-import-test.cpp b/tools/clang-import-test/clang-import-test.cpp index 47598fc91813..33190af4bf45 100644 --- a/tools/clang-import-test/clang-import-test.cpp +++ b/tools/clang-import-test/clang-import-test.cpp @@ -157,7 +157,7 @@ BuildCompilerInstance(ArrayRef ClangArgv) { Inv->getCodeGenOpts().setDebugInfo(codegenoptions::FullDebugInfo); Inv->getTargetOpts().Triple = llvm::sys::getDefaultTargetTriple(); - Ins->setInvocation(Inv.release()); + Ins->setInvocation(std::move(Inv)); TargetInfo *TI = TargetInfo::CreateTargetInfo( Ins->getDiagnostics(), Ins->getInvocation().TargetOpts); diff --git a/tools/diagtool/ShowEnabledWarnings.cpp b/tools/diagtool/ShowEnabledWarnings.cpp index abbd3afbd58c..e6ea786a9ade 100644 --- a/tools/diagtool/ShowEnabledWarnings.cpp +++ b/tools/diagtool/ShowEnabledWarnings.cpp @@ -67,8 +67,8 @@ createDiagnostics(unsigned int argc, char **argv) { SmallVector Args; Args.push_back("diagtool"); Args.append(argv, argv + argc); - std::unique_ptr Invocation( - createInvocationFromCommandLine(Args, InterimDiags)); + std::unique_ptr Invocation = + createInvocationFromCommandLine(Args, InterimDiags); if (!Invocation) return nullptr; diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 40eea39f3bdb..9cdb2ee8d697 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -68,13 +68,14 @@ using namespace clang::cxcursor; using namespace clang::cxtu; using namespace clang::cxindex; -CXTranslationUnit cxtu::MakeCXTranslationUnit(CIndexer *CIdx, ASTUnit *AU) { +CXTranslationUnit cxtu::MakeCXTranslationUnit(CIndexer *CIdx, + std::unique_ptr AU) { if (!AU) return nullptr; assert(CIdx); CXTranslationUnit D = new CXTranslationUnitImpl(); D->CIdx = CIdx; - D->TheASTUnit = AU; + D->TheASTUnit = AU.release(); D->StringPool = new cxstring::CXStringPool(); D->Diagnostics = nullptr; D->OverridenCursorsPool = createOverridenCXCursorsPool(); @@ -3231,7 +3232,7 @@ enum CXErrorCode clang_createTranslationUnit2(CXIndex CIdx, /*CaptureDiagnostics=*/true, /*AllowPCHWithCompilerErrors=*/true, /*UserFilesAreVolatile=*/true); - *out_TU = MakeCXTranslationUnit(CXXIdx, AU.release()); + *out_TU = MakeCXTranslationUnit(CXXIdx, std::move(AU)); return *out_TU ? CXError_Success : CXError_Failure; } @@ -3383,7 +3384,7 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename, if (isASTReadError(Unit ? Unit.get() : ErrUnit.get())) return CXError_ASTReadError; - *out_TU = MakeCXTranslationUnit(CXXIdx, Unit.release()); + *out_TU = MakeCXTranslationUnit(CXXIdx, std::move(Unit)); return *out_TU ? CXError_Success : CXError_Failure; } diff --git a/tools/libclang/CIndexCodeCompletion.cpp b/tools/libclang/CIndexCodeCompletion.cpp index 12895c4a9b7a..ca68bc1cd28e 100644 --- a/tools/libclang/CIndexCodeCompletion.cpp +++ b/tools/libclang/CIndexCodeCompletion.cpp @@ -279,13 +279,12 @@ struct AllocatedCXCodeCompleteResults : public CXCodeCompleteResults { SmallVector TemporaryBuffers; /// \brief Allocator used to store globally cached code-completion results. - IntrusiveRefCntPtr - CachedCompletionAllocator; - + std::shared_ptr + CachedCompletionAllocator; + /// \brief Allocator used to store code completion results. - IntrusiveRefCntPtr - CodeCompletionAllocator; - + std::shared_ptr CodeCompletionAllocator; + /// \brief Context under which completion occurred. enum clang::CodeCompletionContext::Kind ContextKind; @@ -315,15 +314,15 @@ struct AllocatedCXCodeCompleteResults : public CXCodeCompleteResults { /// /// Used for debugging purposes only. static std::atomic CodeCompletionResultObjects; - + AllocatedCXCodeCompleteResults::AllocatedCXCodeCompleteResults( IntrusiveRefCntPtr FileMgr) - : CXCodeCompleteResults(), - DiagOpts(new DiagnosticOptions), + : CXCodeCompleteResults(), DiagOpts(new DiagnosticOptions), Diag(new DiagnosticsEngine( IntrusiveRefCntPtr(new DiagnosticIDs), &*DiagOpts)), FileMgr(FileMgr), SourceMgr(new SourceManager(*Diag, *FileMgr)), - CodeCompletionAllocator(new clang::GlobalCodeCompletionAllocator), + CodeCompletionAllocator( + std::make_shared()), Contexts(CXCompletionContext_Unknown), ContainerKind(CXCursor_InvalidCode), ContainerIsIncomplete(1) { if (getenv("LIBCLANG_OBJTRACKING")) diff --git a/tools/libclang/CXIndexDataConsumer.cpp b/tools/libclang/CXIndexDataConsumer.cpp index 45198dd1b168..1981cabbbe4c 100644 --- a/tools/libclang/CXIndexDataConsumer.cpp +++ b/tools/libclang/CXIndexDataConsumer.cpp @@ -410,8 +410,8 @@ void CXIndexDataConsumer::setASTContext(ASTContext &ctx) { cxtu::getASTUnit(CXTU)->setASTContext(&ctx); } -void CXIndexDataConsumer::setPreprocessor(Preprocessor &PP) { - cxtu::getASTUnit(CXTU)->setPreprocessor(&PP); +void CXIndexDataConsumer::setPreprocessor(std::shared_ptr PP) { + cxtu::getASTUnit(CXTU)->setPreprocessor(std::move(PP)); } bool CXIndexDataConsumer::isFunctionLocalDecl(const Decl *D) { diff --git a/tools/libclang/CXIndexDataConsumer.h b/tools/libclang/CXIndexDataConsumer.h index 406831f1ddce..718a2a18b1b3 100644 --- a/tools/libclang/CXIndexDataConsumer.h +++ b/tools/libclang/CXIndexDataConsumer.h @@ -342,7 +342,7 @@ public: CXTranslationUnit getCXTU() const { return CXTU; } void setASTContext(ASTContext &ctx); - void setPreprocessor(Preprocessor &PP); + void setPreprocessor(std::shared_ptr PP); bool shouldSuppressRefs() const { return IndexOptions & CXIndexOpt_SuppressRedundantRefs; diff --git a/tools/libclang/CXTranslationUnit.h b/tools/libclang/CXTranslationUnit.h index 6022c9dab1b5..67c31d2dba4f 100644 --- a/tools/libclang/CXTranslationUnit.h +++ b/tools/libclang/CXTranslationUnit.h @@ -38,7 +38,8 @@ struct CXTranslationUnitImpl { namespace clang { namespace cxtu { -CXTranslationUnitImpl *MakeCXTranslationUnit(CIndexer *CIdx, ASTUnit *AU); +CXTranslationUnitImpl *MakeCXTranslationUnit(CIndexer *CIdx, + std::unique_ptr AU); static inline ASTUnit *getASTUnit(CXTranslationUnit TU) { if (!TU) diff --git a/tools/libclang/Indexing.cpp b/tools/libclang/Indexing.cpp index c18b5402aa71..f98b25887973 100644 --- a/tools/libclang/Indexing.cpp +++ b/tools/libclang/Indexing.cpp @@ -371,7 +371,7 @@ public: DataConsumer->setASTContext(CI.getASTContext()); Preprocessor &PP = CI.getPreprocessor(); PP.addPPCallbacks(llvm::make_unique(PP, *DataConsumer)); - DataConsumer->setPreprocessor(PP); + DataConsumer->setPreprocessor(CI.getPreprocessorPtr()); if (SKData) { auto *PPRec = new PPConditionalDirectiveRecord(PP.getSourceManager()); @@ -476,17 +476,19 @@ static CXErrorCode clang_indexSourceFile_Impl( // present it will be unused. if (source_filename) Args->push_back(source_filename); - - IntrusiveRefCntPtr - CInvok(createInvocationFromCommandLine(*Args, Diags)); + + std::shared_ptr CInvok = + createInvocationFromCommandLine(*Args, Diags); if (!CInvok) return CXError_Failure; // Recover resources if we crash before exiting this function. - llvm::CrashRecoveryContextCleanupRegistrar > - CInvokCleanup(CInvok.get()); + llvm::CrashRecoveryContextCleanupRegistrar< + std::shared_ptr, + llvm::CrashRecoveryContextDestructorCleanup< + std::shared_ptr>> + CInvokCleanup(&CInvok); if (CInvok->getFrontendOpts().Inputs.empty()) return CXError_Failure; @@ -518,13 +520,14 @@ static CXErrorCode clang_indexSourceFile_Impl( CInvok->getHeaderSearchOpts().ModuleFormat = CXXIdx->getPCHContainerOperations()->getRawReader().getFormat(); - ASTUnit *Unit = ASTUnit::create(CInvok.get(), Diags, CaptureDiagnostics, - /*UserFilesAreVolatile=*/true); + auto Unit = ASTUnit::create(CInvok, Diags, CaptureDiagnostics, + /*UserFilesAreVolatile=*/true); if (!Unit) return CXError_InvalidArguments; + auto *UPtr = Unit.get(); std::unique_ptr CXTU( - new CXTUOwner(MakeCXTranslationUnit(CXXIdx, Unit))); + new CXTUOwner(MakeCXTranslationUnit(CXXIdx, std::move(Unit)))); // Recover resources if we crash before exiting this method. llvm::CrashRecoveryContextCleanupRegistrar @@ -583,16 +586,16 @@ static CXErrorCode clang_indexSourceFile_Impl( !PrecompilePreamble ? 0 : 2 - CreatePreambleOnFirstParse; DiagnosticErrorTrap DiagTrap(*Diags); bool Success = ASTUnit::LoadFromCompilerInvocationAction( - CInvok.get(), CXXIdx->getPCHContainerOperations(), Diags, - IndexAction.get(), Unit, Persistent, CXXIdx->getClangResourcesPath(), + std::move(CInvok), CXXIdx->getPCHContainerOperations(), Diags, + IndexAction.get(), UPtr, Persistent, CXXIdx->getClangResourcesPath(), OnlyLocalDecls, CaptureDiagnostics, PrecompilePreambleAfterNParses, CacheCodeCompletionResults, /*IncludeBriefCommentsInCodeCompletion=*/false, /*UserFilesAreVolatile=*/true); if (DiagTrap.hasErrorOccurred() && CXXIdx->getDisplayDiagnostics()) - printDiagsToStderr(Unit); + printDiagsToStderr(UPtr); - if (isASTReadError(Unit)) + if (isASTReadError(UPtr)) return CXError_ASTReadError; if (!Success) diff --git a/unittests/AST/ExternalASTSourceTest.cpp b/unittests/AST/ExternalASTSourceTest.cpp index 4b3bb3e2b69b..513ff5b99fad 100644 --- a/unittests/AST/ExternalASTSourceTest.cpp +++ b/unittests/AST/ExternalASTSourceTest.cpp @@ -49,14 +49,14 @@ bool testExternalASTSource(ExternalASTSource *Source, CompilerInstance Compiler; Compiler.createDiagnostics(); - CompilerInvocation *Invocation = new CompilerInvocation; + auto Invocation = std::make_shared(); Invocation->getPreprocessorOpts().addRemappedFile( "test.cc", MemoryBuffer::getMemBuffer(FileContents).release()); const char *Args[] = { "test.cc" }; CompilerInvocation::CreateFromArgs(*Invocation, Args, Args + array_lengthof(Args), Compiler.getDiagnostics()); - Compiler.setInvocation(Invocation); + Compiler.setInvocation(std::move(Invocation)); TestFrontendAction Action(Source); return Compiler.ExecuteAction(Action); diff --git a/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index 67a4a3b2fc09..5957c7fa41da 100644 --- a/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -222,9 +222,12 @@ TEST(HasDeclaration, HasDeclarationOfEnumType) { } TEST(HasDeclaration, HasGetDeclTraitTest) { - EXPECT_TRUE(internal::has_getDecl::value); - EXPECT_TRUE(internal::has_getDecl::value); - EXPECT_FALSE(internal::has_getDecl::value); + static_assert(internal::has_getDecl::value, + "Expected TypedefType to have a getDecl."); + static_assert(internal::has_getDecl::value, + "Expected RecordType to have a getDecl."); + static_assert(!internal::has_getDecl::value, + "Expected TemplateSpecializationType to *not* have a getDecl."); } TEST(HasDeclaration, HasDeclarationOfTypeWithDecl) { diff --git a/unittests/Basic/SourceManagerTest.cpp b/unittests/Basic/SourceManagerTest.cpp index f41876147cdd..a967b0ec7c21 100644 --- a/unittests/Basic/SourceManagerTest.cpp +++ b/unittests/Basic/SourceManagerTest.cpp @@ -78,10 +78,10 @@ TEST_F(SourceManagerTest, isBeforeInTranslationUnit) { SourceMgr.setMainFileID(mainFileID); VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, LangOpts, - &*Target); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, SourceMgr, - HeaderInfo, ModLoader, + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, &*Target); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); @@ -198,10 +198,10 @@ TEST_F(SourceManagerTest, getMacroArgExpandedLocation) { SourceMgr.overrideFileContents(headerFile, std::move(HeaderBuf)); VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, LangOpts, - &*Target); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, SourceMgr, - HeaderInfo, ModLoader, + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, &*Target); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); @@ -298,10 +298,10 @@ TEST_F(SourceManagerTest, isBeforeInTranslationUnitWithMacroInInclude) { SourceMgr.overrideFileContents(headerFile, std::move(HeaderBuf)); VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, LangOpts, - &*Target); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, SourceMgr, - HeaderInfo, ModLoader, + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, &*Target); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp index 90c99317bd79..59f4a4f6dcfe 100644 --- a/unittests/Format/FormatTestJS.cpp +++ b/unittests/Format/FormatTestJS.cpp @@ -541,8 +541,8 @@ TEST_F(FormatTestJS, FunctionLiterals) { " foo();\n" " bar();\n" " },\n" - " this, arg1IsReallyLongAndNeeedsLineBreaks,\n" - " arg3IsReallyLongAndNeeedsLineBreaks);"); + " this, arg1IsReallyLongAndNeedsLineBreaks,\n" + " arg3IsReallyLongAndNeedsLineBreaks);"); verifyFormat("var closure = goog.bind(function() { // comment\n" " foo();\n" " bar();\n" diff --git a/unittests/Frontend/CodeGenActionTest.cpp b/unittests/Frontend/CodeGenActionTest.cpp index 356b5130fcbe..1d2a50c8bc20 100644 --- a/unittests/Frontend/CodeGenActionTest.cpp +++ b/unittests/Frontend/CodeGenActionTest.cpp @@ -41,7 +41,7 @@ public: TEST(CodeGenTest, TestNullCodeGen) { - CompilerInvocation *Invocation = new CompilerInvocation; + auto Invocation = std::make_shared(); Invocation->getPreprocessorOpts().addRemappedFile( "test.cc", MemoryBuffer::getMemBuffer("").release()); @@ -50,7 +50,7 @@ TEST(CodeGenTest, TestNullCodeGen) { Invocation->getFrontendOpts().ProgramAction = EmitLLVM; Invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu"; CompilerInstance Compiler; - Compiler.setInvocation(Invocation); + Compiler.setInvocation(std::move(Invocation)); Compiler.createDiagnostics(); EXPECT_TRUE(Compiler.hasDiagnostics()); diff --git a/unittests/Frontend/FrontendActionTest.cpp b/unittests/Frontend/FrontendActionTest.cpp index c3e6adb6324d..dd6be5fd4b98 100644 --- a/unittests/Frontend/FrontendActionTest.cpp +++ b/unittests/Frontend/FrontendActionTest.cpp @@ -79,7 +79,7 @@ private: }; TEST(ASTFrontendAction, Sanity) { - CompilerInvocation *invocation = new CompilerInvocation; + auto invocation = std::make_shared(); invocation->getPreprocessorOpts().addRemappedFile( "test.cc", MemoryBuffer::getMemBuffer("int main() { float x; }").release()); @@ -88,7 +88,7 @@ TEST(ASTFrontendAction, Sanity) { invocation->getFrontendOpts().ProgramAction = frontend::ParseSyntaxOnly; invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu"; CompilerInstance compiler; - compiler.setInvocation(invocation); + compiler.setInvocation(std::move(invocation)); compiler.createDiagnostics(); TestASTFrontendAction test_action; @@ -99,7 +99,7 @@ TEST(ASTFrontendAction, Sanity) { } TEST(ASTFrontendAction, IncrementalParsing) { - CompilerInvocation *invocation = new CompilerInvocation; + auto invocation = std::make_shared(); invocation->getPreprocessorOpts().addRemappedFile( "test.cc", MemoryBuffer::getMemBuffer("int main() { float x; }").release()); @@ -108,7 +108,7 @@ TEST(ASTFrontendAction, IncrementalParsing) { invocation->getFrontendOpts().ProgramAction = frontend::ParseSyntaxOnly; invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu"; CompilerInstance compiler; - compiler.setInvocation(invocation); + compiler.setInvocation(std::move(invocation)); compiler.createDiagnostics(); TestASTFrontendAction test_action(/*enableIncrementalProcessing=*/true); @@ -119,7 +119,7 @@ TEST(ASTFrontendAction, IncrementalParsing) { } TEST(ASTFrontendAction, LateTemplateIncrementalParsing) { - CompilerInvocation *invocation = new CompilerInvocation; + auto invocation = std::make_shared(); invocation->getLangOpts()->CPlusPlus = true; invocation->getLangOpts()->DelayedTemplateParsing = true; invocation->getPreprocessorOpts().addRemappedFile( @@ -135,7 +135,7 @@ TEST(ASTFrontendAction, LateTemplateIncrementalParsing) { invocation->getFrontendOpts().ProgramAction = frontend::ParseSyntaxOnly; invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu"; CompilerInstance compiler; - compiler.setInvocation(invocation); + compiler.setInvocation(std::move(invocation)); compiler.createDiagnostics(); TestASTFrontendAction test_action(/*enableIncrementalProcessing=*/true, @@ -172,7 +172,7 @@ public: }; TEST(PreprocessorFrontendAction, EndSourceFile) { - CompilerInvocation *Invocation = new CompilerInvocation; + auto Invocation = std::make_shared(); Invocation->getPreprocessorOpts().addRemappedFile( "test.cc", MemoryBuffer::getMemBuffer("int main() { float x; }").release()); @@ -181,7 +181,7 @@ TEST(PreprocessorFrontendAction, EndSourceFile) { Invocation->getFrontendOpts().ProgramAction = frontend::ParseSyntaxOnly; Invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu"; CompilerInstance Compiler; - Compiler.setInvocation(Invocation); + Compiler.setInvocation(std::move(Invocation)); Compiler.createDiagnostics(); TestPPCallbacks *Callbacks = new TestPPCallbacks; @@ -231,7 +231,7 @@ struct TypoDiagnosticConsumer : public DiagnosticConsumer { }; TEST(ASTFrontendAction, ExternalSemaSource) { - auto *Invocation = new CompilerInvocation; + auto Invocation = std::make_shared(); Invocation->getLangOpts()->CPlusPlus = true; Invocation->getPreprocessorOpts().addRemappedFile( "test.cc", MemoryBuffer::getMemBuffer("void fooo();\n" @@ -242,7 +242,7 @@ TEST(ASTFrontendAction, ExternalSemaSource) { Invocation->getFrontendOpts().ProgramAction = frontend::ParseSyntaxOnly; Invocation->getTargetOpts().Triple = "i386-unknown-linux-gnu"; CompilerInstance Compiler; - Compiler.setInvocation(Invocation); + Compiler.setInvocation(std::move(Invocation)); auto *TDC = new TypoDiagnosticConsumer; Compiler.createDiagnostics(TDC, /*ShouldOwnClient=*/true); Compiler.setExternalSemaSource(new TypoExternalSemaSource(Compiler)); diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp index 204601818152..918167bf43c5 100644 --- a/unittests/Lex/LexerTest.cpp +++ b/unittests/Lex/LexerTest.cpp @@ -64,10 +64,10 @@ protected: SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, LangOpts, - Target.get()); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, SourceMgr, - HeaderInfo, ModLoader, /*IILookup =*/nullptr, + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, Target.get()); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); PP.EnterMainSourceFile(); diff --git a/unittests/Lex/PPCallbacksTest.cpp b/unittests/Lex/PPCallbacksTest.cpp index cbce5c6e1676..064abafc4a88 100644 --- a/unittests/Lex/PPCallbacksTest.cpp +++ b/unittests/Lex/PPCallbacksTest.cpp @@ -162,13 +162,12 @@ protected: VoidModuleLoader ModLoader; - IntrusiveRefCntPtr HSOpts = new HeaderSearchOptions(); - HeaderSearch HeaderInfo(HSOpts, SourceMgr, Diags, LangOpts, - Target.get()); + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, Target.get()); AddFakeHeader(HeaderInfo, HeaderPath, SystemHeader); - IntrusiveRefCntPtr PPOpts = new PreprocessorOptions(); - Preprocessor PP(PPOpts, Diags, LangOpts, SourceMgr, HeaderInfo, ModLoader, + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); @@ -199,11 +198,12 @@ protected: SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(SourceBuf))); VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, - OpenCLLangOpts, Target.get()); + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, OpenCLLangOpts, Target.get()); - Preprocessor PP(new PreprocessorOptions(), Diags, OpenCLLangOpts, SourceMgr, - HeaderInfo, ModLoader, /*IILookup =*/nullptr, + Preprocessor PP(std::make_shared(), Diags, + OpenCLLangOpts, SourceMgr, HeaderInfo, ModLoader, + /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); diff --git a/unittests/Lex/PPConditionalDirectiveRecordTest.cpp b/unittests/Lex/PPConditionalDirectiveRecordTest.cpp index bceeac57ea61..dccfffdb2c15 100644 --- a/unittests/Lex/PPConditionalDirectiveRecordTest.cpp +++ b/unittests/Lex/PPConditionalDirectiveRecordTest.cpp @@ -93,10 +93,10 @@ TEST_F(PPConditionalDirectiveRecordTest, PPRecAPI) { SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, LangOpts, - Target.get()); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, SourceMgr, - HeaderInfo, ModLoader, + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, Target.get()); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); diff --git a/utils/TableGen/ClangAttrEmitter.cpp b/utils/TableGen/ClangAttrEmitter.cpp index d65794e86374..27ab34c1309d 100644 --- a/utils/TableGen/ClangAttrEmitter.cpp +++ b/utils/TableGen/ClangAttrEmitter.cpp @@ -133,10 +133,9 @@ static StringRef NormalizeNameForSpellingComparison(StringRef Name) { return Name.trim("_"); } -// Normalize attribute spelling only if the spelling has both leading -// and trailing underscores. For example, __ms_struct__ will be -// normalized to "ms_struct"; __cdecl will remain intact. -static StringRef NormalizeAttrSpelling(StringRef AttrSpelling) { +// Normalize the spelling of a GNU attribute (i.e. "x" in "__attribute__((x))"), +// removing "__" if it appears at the beginning and end of the attribute's name. +static StringRef NormalizeGNUAttrSpelling(StringRef AttrSpelling) { if (AttrSpelling.startswith("__") && AttrSpelling.endswith("__")) { AttrSpelling = AttrSpelling.substr(2, AttrSpelling.size() - 4); } @@ -3045,7 +3044,11 @@ void EmitClangAttrParsedAttrKinds(RecordKeeper &Records, raw_ostream &OS) { assert(Matches && "Unsupported spelling variety found"); - Spelling += NormalizeAttrSpelling(RawSpelling); + if (Variety == "GNU") + Spelling += NormalizeGNUAttrSpelling(RawSpelling); + else + Spelling += RawSpelling; + if (SemaHandler) Matches->push_back(StringMatcher::StringPair(Spelling, "return AttributeList::AT_" + AttrName + ";")); -- cgit v1.2.3 From 909545a822eef491158f831688066f0ec2866938 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 9 Jan 2017 21:23:09 +0000 Subject: Vendor import of llvm trunk r291476: https://llvm.org/svn/llvm-project/llvm/trunk@291476 --- cmake/config-ix.cmake | 13 +- cmake/modules/AddLLVM.cmake | 17 +- include/llvm/Analysis/ScalarEvolution.h | 2 + include/llvm/Analysis/TargetLibraryInfo.h | 4 +- include/llvm/CodeGen/MachineBasicBlock.h | 10 + include/llvm/CodeGen/MachineFrameInfo.h | 3 +- include/llvm/DebugInfo/MSF/StreamArray.h | 111 +- .../ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h | 4 +- include/llvm/ExecutionEngine/Orc/RPCUtils.h | 246 +- include/llvm/ExecutionEngine/Orc/RawByteChannel.h | 4 +- include/llvm/IR/ModuleSummaryIndexYAML.h | 12 +- include/llvm/IR/PassManager.h | 127 +- include/llvm/IR/User.h | 20 + include/llvm/Support/Path.h | 8 + include/llvm/Transforms/IPO.h | 13 +- include/llvm/Transforms/IPO/PassManagerBuilder.h | 1 - lib/Analysis/InstructionSimplify.cpp | 20 + lib/Analysis/LoopInfo.cpp | 6 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 42 +- lib/Analysis/ScalarEvolution.cpp | 12 + lib/Analysis/ValueTracking.cpp | 1 + lib/Bitcode/Reader/MetadataLoader.cpp | 13 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 4 +- lib/CodeGen/StackSlotColoring.cpp | 11 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 44 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h | 3 + lib/LTO/ThinLTOCodeGenerator.cpp | 9 +- lib/Object/MachOObjectFile.cpp | 8 + lib/Object/ModuleSummaryIndexObjectFile.cpp | 8 + lib/Support/CommandLine.cpp | 2 +- lib/Support/Path.cpp | 10 + lib/Support/TarWriter.cpp | 42 +- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 + lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 - lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 52 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 281 +- lib/Target/AMDGPU/R600Instructions.td | 11 + lib/Target/AMDGPU/SIISelLowering.cpp | 39 +- lib/Target/AMDGPU/SIISelLowering.h | 3 +- lib/Target/AVR/AVRISelDAGToDAG.cpp | 4 +- lib/Target/AVR/AVRISelLowering.cpp | 41 + lib/Target/AVR/AVRISelLowering.h | 3 + lib/Target/BPF/BPFInstrInfo.cpp | 16 +- lib/Target/BPF/Disassembler/BPFDisassembler.cpp | 12 +- lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 20 +- lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp | 11 +- lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp | 19 +- lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp | 9 +- lib/Target/TargetMachineC.cpp | 4 +- lib/Target/WebAssembly/CMakeLists.txt | 1 + lib/Target/WebAssembly/WebAssembly.h | 1 + .../WebAssembly/WebAssemblyFixFunctionBitcasts.cpp | 159 + lib/Target/WebAssembly/WebAssemblyInstrInteger.td | 4 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 4 + lib/Target/X86/X86ISelLowering.cpp | 264 +- lib/Target/X86/X86InstrAVX512.td | 247 +- lib/Target/X86/X86InstrInfo.cpp | 19 +- lib/Target/X86/X86InstrSSE.td | 2 +- lib/Target/X86/X86TargetTransformInfo.cpp | 291 +- lib/Transforms/IPO/LowerTypeTests.cpp | 109 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 3 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 +- .../Instrumentation/AddressSanitizer.cpp | 1 + lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +- lib/Transforms/Scalar/LoopLoadElimination.cpp | 4 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 2 +- lib/Transforms/Scalar/NewGVN.cpp | 192 +- lib/Transforms/Scalar/SCCP.cpp | 18 - lib/Transforms/Utils/FunctionImportUtils.cpp | 15 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 12 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 34 +- test/Analysis/CostModel/X86/shuffle-reverse.ll | 2 +- test/Analysis/CostModel/X86/testshiftlshr.ll | 4 +- test/Analysis/CostModel/X86/testshiftshl.ll | 4 +- test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 45 +- test/Analysis/CostModel/X86/vshift-lshr-cost.ll | 66 +- test/Analysis/CostModel/X86/vshift-shl-cost.ll | 70 +- test/Analysis/ScalarEvolution/invalidation.ll | 70 + test/Analysis/ValueTracking/assume.ll | 22 +- test/Bindings/Go/lit.local.cfg | 2 +- test/Bindings/OCaml/lit.local.cfg | 2 +- test/CMakeLists.txt | 14 +- test/CodeGen/AMDGPU/load-constant-i16.ll | 138 +- test/CodeGen/AMDGPU/load-global-i16.ll | 331 +- test/CodeGen/AMDGPU/min.ll | 172 +- test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll | 16 + test/CodeGen/AMDGPU/store-private.ll | 743 +++ test/CodeGen/AVR/intrinsics/read_register.ll | 17 + test/CodeGen/WebAssembly/function-bitcasts.ll | 56 + .../WebAssembly/unsupported-function-bitcasts.ll | 26 + test/CodeGen/X86/avx2-arith.ll | 101 +- test/CodeGen/X86/avx512-bugfix-23634.ll | 2 +- test/CodeGen/X86/avx512-calling-conv.ll | 24 +- test/CodeGen/X86/avx512-cvt.ll | 14 +- test/CodeGen/X86/avx512-ext.ll | 33 +- test/CodeGen/X86/avx512-insert-extract.ll | 56 +- test/CodeGen/X86/avx512-mask-op.ll | 110 +- test/CodeGen/X86/avx512-mov.ll | 16 +- test/CodeGen/X86/avx512-regcall-NoMask.ll | 30 +- test/CodeGen/X86/avx512-vbroadcast.ll | 3 +- test/CodeGen/X86/avx512-vec-cmp.ll | 141 +- test/CodeGen/X86/avx512bw-mov.ll | 4 +- test/CodeGen/X86/avx512bw-vec-cmp.ll | 36 +- test/CodeGen/X86/avx512bwvl-mov.ll | 8 +- test/CodeGen/X86/avx512bwvl-vec-cmp.ll | 72 +- test/CodeGen/X86/avx512vl-mov.ll | 32 +- test/CodeGen/X86/avx512vl-vec-cmp.ll | 144 +- test/CodeGen/X86/cmov.ll | 6 +- test/CodeGen/X86/fma-fneg-combine.ll | 12 +- test/CodeGen/X86/fmaddsub-combine.ll | 129 + test/CodeGen/X86/sse-fsignum.ll | 11 +- test/CodeGen/X86/vector-compare-results.ll | 6208 +++++++++++++++----- test/CodeGen/X86/vector-sext.ll | 45 +- test/CodeGen/X86/vector-shift-ashr-128.ll | 130 +- test/CodeGen/X86/vector-shift-ashr-256.ll | 234 +- test/CodeGen/X86/vector-shift-ashr-512.ll | 52 +- test/CodeGen/X86/vector-shift-lshr-128.ll | 94 +- test/CodeGen/X86/vector-shift-lshr-256.ll | 162 +- test/CodeGen/X86/vector-shift-lshr-512.ll | 52 +- test/CodeGen/X86/vector-shift-shl-128.ll | 88 +- test/CodeGen/X86/vector-shift-shl-256.ll | 154 +- test/CodeGen/X86/vector-shift-shl-512.ll | 27 +- test/CodeGen/X86/vector-shuffle-512-v64.ll | 9 +- test/CodeGen/X86/vector-shuffle-masked.ll | 33 +- test/CodeGen/X86/vector-shuffle-v1.ll | 74 +- test/ExecutionEngine/Interpreter/lit.local.cfg | 2 +- .../RuntimeDyld/AArch64/ELF_ARM64_BE-relocations.s | 11 +- .../RuntimeDyld/AArch64/ELF_ARM64_local_branch.s | 14 + .../RuntimeDyld/AArch64/ELF_ARM64_relocations.s | 35 +- .../AddressSanitizer/global_metadata_darwin.ll | 2 +- test/JitListener/lit.local.cfg | 2 +- test/ThinLTO/X86/Inputs/funcimport-tbaa.ll | 11 + test/ThinLTO/X86/Inputs/local_name_conflict1.ll | 17 + test/ThinLTO/X86/Inputs/local_name_conflict2.ll | 17 + test/ThinLTO/X86/funcimport-tbaa.ll | 38 + test/ThinLTO/X86/local_name_conflict.ll | 29 + test/Transforms/GVN/invariant.group.ll | 52 + test/Transforms/InstCombine/assume.ll | 45 +- test/Transforms/InstCombine/assume2.ll | 141 +- test/Transforms/InstCombine/fabs.ll | 42 +- test/Transforms/InstCombine/fast-math.ll | 6 +- test/Transforms/InstCombine/urem-simplify-bug.ll | 52 +- test/Transforms/InstSimplify/div.ll | 15 + test/Transforms/InstSimplify/rem.ll | 14 + test/Transforms/LICM/hoisting.ll | 27 + test/Transforms/LoopLoadElim/forward.ll | 6 +- test/Transforms/LoopVectorize/iv_outside_user.ll | 45 + test/Transforms/NewGVN/basic-cyclic-opt.ll | 235 + test/Transforms/NewGVN/cyclic-phi-handling.ll | 37 + test/Transforms/NewGVN/invariant.group.ll | 52 + test/Transforms/NewGVN/memory-handling.ll | 195 + test/Transforms/NewGVN/pr31501.ll | 136 + test/Transforms/NewGVN/pr31573.ll | 42 + test/lit.cfg | 10 +- test/lit.site.cfg.in | 18 +- test/tools/llvm-config/system-libs.test | 3 +- test/tools/llvm-config/system-libs.windows.test | 3 +- test/tools/llvm-opt-report/Inputs/dm.c | 13 + test/tools/llvm-opt-report/Inputs/dm.yaml | 104 + test/tools/llvm-opt-report/func-dm.test | 17 + tools/llvm-config/llvm-config.cpp | 8 +- tools/llvm-objdump/MachODump.cpp | 31 +- tools/llvm-opt-report/OptReport.cpp | 14 +- unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp | 59 +- unittests/IR/UserTest.cpp | 25 + utils/unittest/CMakeLists.txt | 4 +- utils/update_test_checks.py | 51 +- 167 files changed, 10583 insertions(+), 4006 deletions(-) create mode 100644 lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp create mode 100644 test/Analysis/ScalarEvolution/invalidation.ll create mode 100644 test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll create mode 100644 test/CodeGen/AMDGPU/store-private.ll create mode 100644 test/CodeGen/AVR/intrinsics/read_register.ll create mode 100644 test/CodeGen/WebAssembly/function-bitcasts.ll create mode 100644 test/CodeGen/WebAssembly/unsupported-function-bitcasts.ll create mode 100644 test/CodeGen/X86/fmaddsub-combine.ll create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_local_branch.s create mode 100644 test/ThinLTO/X86/Inputs/funcimport-tbaa.ll create mode 100644 test/ThinLTO/X86/Inputs/local_name_conflict1.ll create mode 100644 test/ThinLTO/X86/Inputs/local_name_conflict2.ll create mode 100644 test/ThinLTO/X86/funcimport-tbaa.ll create mode 100644 test/ThinLTO/X86/local_name_conflict.ll create mode 100644 test/Transforms/InstSimplify/div.ll create mode 100644 test/Transforms/NewGVN/basic-cyclic-opt.ll create mode 100644 test/Transforms/NewGVN/cyclic-phi-handling.ll create mode 100644 test/Transforms/NewGVN/memory-handling.ll create mode 100644 test/Transforms/NewGVN/pr31501.ll create mode 100644 test/Transforms/NewGVN/pr31573.ll create mode 100644 test/tools/llvm-opt-report/Inputs/dm.c create mode 100644 test/tools/llvm-opt-report/Inputs/dm.yaml create mode 100644 test/tools/llvm-opt-report/func-dm.test diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index d76f1293d02c..4288cf4bdd04 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -316,9 +316,9 @@ else() endif() endif() -check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG) -check_cxx_compiler_flag("-Wno-gnu-zero-variadic-macro-arguments" - SUPPORTS_NO_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG) +check_cxx_compiler_flag("-Wvariadic-macros" SUPPORTS_VARIADIC_MACROS_FLAG) +check_cxx_compiler_flag("-Wgnu-zero-variadic-macro-arguments" + SUPPORTS_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG) set(USE_NO_MAYBE_UNINITIALIZED 0) set(USE_NO_UNINITIALIZED 0) @@ -462,13 +462,6 @@ if( MSVC ) if(LLVM_ENABLE_DIA_SDK AND NOT HAVE_DIA_SDK) message(FATAL_ERROR "DIA SDK not found. If you have both VS 2012 and 2013 installed, you may need to uninstall the former and re-install the latter afterwards.") endif() - - # Normalize to 0/1 for lit.site.cfg - if(LLVM_ENABLE_DIA_SDK) - set(LLVM_ENABLE_DIA_SDK 1) - else() - set(LLVM_ENABLE_DIA_SDK 0) - endif() else() set(LLVM_ENABLE_DIA_SDK 0) endif( MSVC ) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index fbef1d04eac4..56ba1479d7ee 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1011,11 +1011,11 @@ function(add_unittest test_suite test_name) list(APPEND LLVM_COMPILE_DEFINITIONS GTEST_HAS_PTHREAD=0) endif () - if (SUPPORTS_NO_VARIADIC_MACROS_FLAG) + if (SUPPORTS_VARIADIC_MACROS_FLAG) list(APPEND LLVM_COMPILE_FLAGS "-Wno-variadic-macros") endif () # Some parts of gtest rely on this GNU extension, don't warn on it. - if(SUPPORTS_NO_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG) + if(SUPPORTS_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG) list(APPEND LLVM_COMPILE_FLAGS "-Wno-gnu-zero-variadic-macro-arguments") endif() @@ -1067,6 +1067,19 @@ function(llvm_add_go_executable binary pkgpath) endif() endfunction() +# This function canonicalize the CMake variables passed by names +# from CMake boolean to 0/1 suitable for passing into Python or C++, +# in place. +function(llvm_canonicalize_cmake_booleans) + foreach(var ${ARGN}) + if(${var}) + set(${var} 1 PARENT_SCOPE) + else() + set(${var} 0 PARENT_SCOPE) + endif() + endforeach() +endfunction(llvm_canonicalize_cmake_booleans) + # This function provides an automatic way to 'configure'-like generate a file # based on a set of common and custom variables, specifically targeting the # variables needed for the 'lit.site.cfg' files. This function bundles the diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 9dcffe1ac5fb..1a93f9aa5fd2 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -1491,6 +1491,8 @@ public: void print(raw_ostream &OS) const; void verify() const; + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); /// Collect parametric terms occurring in step expressions (first step of /// delinearization). diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 196fbc7faa8d..8675882431d5 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -290,7 +290,7 @@ public: } /// Returns extension attribute kind to be used for i32 parameters - /// correpsonding to C-level int or unsigned int. May be zeroext, signext, + /// corresponding to C-level int or unsigned int. May be zeroext, signext, /// or none. Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const { if (Impl->ShouldExtI32Param) @@ -301,7 +301,7 @@ public: } /// Returns extension attribute kind to be used for i32 return values - /// correpsonding to C-level int or unsigned int. May be zeroext, signext, + /// corresponding to C-level int or unsigned int. May be zeroext, signext, /// or none. Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const { if (Impl->ShouldExtI32Return) diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 92a9896d7a18..f3f5e324d76a 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -308,6 +308,16 @@ public: // Iteration support for live in sets. These sets are kept in sorted // order by their register number. typedef LiveInVector::const_iterator livein_iterator; +#ifndef NDEBUG + /// Unlike livein_begin, this method does not check that the liveness + /// information is accurate. Still for debug purposes it may be useful + /// to have iterators that won't assert if the liveness information + /// is not current. + livein_iterator livein_begin_dbg() const { return LiveIns.begin(); } + iterator_range liveins_dbg() const { + return make_range(livein_begin_dbg(), livein_end()); + } +#endif livein_iterator livein_begin() const; livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 2fab8137564e..4600c2c0f10c 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -148,8 +148,7 @@ class MachineFrameInfo { /// grouping overaligned allocas into a "secondary stack frame" and /// then only use a single alloca to allocate this frame and only a /// single virtual register to access it. Currently, without such an - /// optimization, each such alloca gets it's own dynamic - /// realignment. + /// optimization, each such alloca gets its own dynamic realignment. bool StackRealignable; /// Whether the function has the \c alignstack attribute. diff --git a/include/llvm/DebugInfo/MSF/StreamArray.h b/include/llvm/DebugInfo/MSF/StreamArray.h index d8b74bc75c94..3bba80d807f3 100644 --- a/include/llvm/DebugInfo/MSF/StreamArray.h +++ b/include/llvm/DebugInfo/MSF/StreamArray.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_MSF_STREAMARRAY_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator.h" #include "llvm/DebugInfo/MSF/StreamRef.h" #include "llvm/Support/Error.h" #include @@ -107,7 +108,10 @@ private: Extractor E; }; -template class VarStreamArrayIterator { +template +class VarStreamArrayIterator + : public iterator_facade_base, + std::forward_iterator_tag, ValueType> { typedef VarStreamArrayIterator IterType; typedef VarStreamArray ArrayType; @@ -144,41 +148,39 @@ public: return false; } - bool operator!=(const IterType &R) { return !(*this == R); } - const ValueType &operator*() const { assert(Array && !HasError); return ThisValue; } - IterType &operator++() { - // We are done with the current record, discard it so that we are - // positioned at the next record. - IterRef = IterRef.drop_front(ThisLen); - if (IterRef.getLength() == 0) { - // There is nothing after the current record, we must make this an end - // iterator. - moveToEnd(); - } else { - // There is some data after the current record. - auto EC = Extract(IterRef, ThisLen, ThisValue); - if (EC) { - consumeError(std::move(EC)); - markError(); - } else if (ThisLen == 0) { - // An empty record? Make this an end iterator. + IterType &operator+=(std::ptrdiff_t N) { + while (N > 0) { + // We are done with the current record, discard it so that we are + // positioned at the next record. + IterRef = IterRef.drop_front(ThisLen); + if (IterRef.getLength() == 0) { + // There is nothing after the current record, we must make this an end + // iterator. moveToEnd(); + return *this; + } else { + // There is some data after the current record. + auto EC = Extract(IterRef, ThisLen, ThisValue); + if (EC) { + consumeError(std::move(EC)); + markError(); + return *this; + } else if (ThisLen == 0) { + // An empty record? Make this an end iterator. + moveToEnd(); + return *this; + } } + --N; } return *this; } - IterType operator++(int) { - IterType Original = *this; - ++*this; - return Original; - } - private: void moveToEnd() { Array = nullptr; @@ -211,6 +213,16 @@ public: assert(Stream.getLength() % sizeof(T) == 0); } + bool operator==(const FixedStreamArray &Other) const { + return Stream == Other.Stream; + } + + bool operator!=(const FixedStreamArray &Other) const { + return !(*this == Other); + } + + FixedStreamArray &operator=(const FixedStreamArray &) = default; + const T &operator[](uint32_t Index) const { assert(Index < size()); uint32_t Off = Index * sizeof(T); @@ -226,6 +238,8 @@ public: uint32_t size() const { return Stream.getLength() / sizeof(T); } + bool empty() const { return size() == 0; } + FixedStreamArrayIterator begin() const { return FixedStreamArrayIterator(*this, 0); } @@ -240,36 +254,53 @@ private: ReadableStreamRef Stream; }; -template class FixedStreamArrayIterator { +template +class FixedStreamArrayIterator + : public iterator_facade_base, + std::random_access_iterator_tag, T> { + public: FixedStreamArrayIterator(const FixedStreamArray &Array, uint32_t Index) : Array(Array), Index(Index) {} - bool operator==(const FixedStreamArrayIterator &R) { - assert(&Array == &R.Array); - return Index == R.Index; + FixedStreamArrayIterator & + operator=(const FixedStreamArrayIterator &Other) { + Array = Other.Array; + Index = Other.Index; + return *this; } - bool operator!=(const FixedStreamArrayIterator &R) { - return !(*this == R); + const T &operator*() const { return Array[Index]; } + + bool operator==(const FixedStreamArrayIterator &R) const { + assert(Array == R.Array); + return (Index == R.Index) && (Array == R.Array); } - const T &operator*() const { return Array[Index]; } + FixedStreamArrayIterator &operator+=(std::ptrdiff_t N) { + Index += N; + return *this; + } - FixedStreamArrayIterator &operator++() { - assert(Index < Array.size()); - ++Index; + FixedStreamArrayIterator &operator-=(std::ptrdiff_t N) { + assert(Index >= N); + Index -= N; return *this; } - FixedStreamArrayIterator operator++(int) { - FixedStreamArrayIterator Original = *this; - ++*this; - return Original; + std::ptrdiff_t operator-(const FixedStreamArrayIterator &R) const { + assert(Array == R.Array); + assert(Index >= R.Index); + return Index - R.Index; + } + + bool operator<(const FixedStreamArrayIterator &RHS) const { + assert(Array == RHS.Array); + return Index < RHS.Index; } private: - const FixedStreamArray &Array; + FixedStreamArray Array; uint32_t Index; }; diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h index ab2b0fad89fd..3086ef0cdf80 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h +++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h @@ -83,7 +83,7 @@ public: namespace remote { class OrcRemoteTargetRPCAPI - : public rpc::SingleThreadedRPC { + : public rpc::SingleThreadedRPCEndpoint { protected: class ResourceIdMgr { public: @@ -108,7 +108,7 @@ protected: public: // FIXME: Remove constructors once MSVC supports synthesizing move-ops. OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C) - : rpc::SingleThreadedRPC(C, true) {} + : rpc::SingleThreadedRPCEndpoint(C, true) {} class CallIntVoid : public rpc::Function { diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h index f51fbe153a41..37e2e66e5af4 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h +++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h @@ -702,7 +702,7 @@ public: /// sync. template -class RPCBase { +class RPCEndpointBase { protected: class OrcRPCInvalid : public Function { public: @@ -747,7 +747,7 @@ protected: public: /// Construct an RPC instance on a channel. - RPCBase(ChannelT &C, bool LazyAutoNegotiation) + RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation) : C(C), LazyAutoNegotiation(LazyAutoNegotiation) { // Hold ResponseId in a special variable, since we expect Response to be // called relatively frequently, and want to avoid the map lookup. @@ -788,15 +788,21 @@ public: return FnIdOrErr.takeError(); } - // Allocate a sequence number. - auto SeqNo = SequenceNumberMgr.getSequenceNumber(); - assert(!PendingResponses.count(SeqNo) && - "Sequence number already allocated"); + SequenceNumberT SeqNo; // initialized in locked scope below. + { + // Lock the pending responses map and sequence number manager. + std::lock_guard Lock(ResponsesMutex); + + // Allocate a sequence number. + SeqNo = SequenceNumberMgr.getSequenceNumber(); + assert(!PendingResponses.count(SeqNo) && + "Sequence number already allocated"); - // Install the user handler. - PendingResponses[SeqNo] = + // Install the user handler. + PendingResponses[SeqNo] = detail::createResponseHandler( std::move(Handler)); + } // Open the function call message. if (auto Err = C.startSendMessage(FnId, SeqNo)) { @@ -863,11 +869,33 @@ public: return detail::ReadArgs(Args...); } + /// Abandon all outstanding result handlers. + /// + /// This will call all currently registered result handlers to receive an + /// "abandoned" error as their argument. This is used internally by the RPC + /// in error situations, but can also be called directly by clients who are + /// disconnecting from the remote and don't or can't expect responses to their + /// outstanding calls. (Especially for outstanding blocking calls, calling + /// this function may be necessary to avoid dead threads). + void abandonPendingResponses() { + // Lock the pending responses map and sequence number manager. + std::lock_guard Lock(ResponsesMutex); + + for (auto &KV : PendingResponses) + KV.second->abandon(); + PendingResponses.clear(); + SequenceNumberMgr.reset(); + } + protected: // The LaunchPolicy type allows a launch policy to be specified when adding // a function handler. See addHandlerImpl. using LaunchPolicy = std::function)>; + FunctionIdT getInvalidFunctionId() const { + return FnIdAllocator.getInvalidId(); + } + /// Add the given handler to the handler map and make it available for /// autonegotiation and execution. template @@ -884,28 +912,32 @@ protected: wrapHandler(std::move(Handler), std::move(Launch)); } - // Abandon all outstanding results. - void abandonPendingResponses() { - for (auto &KV : PendingResponses) - KV.second->abandon(); - PendingResponses.clear(); - SequenceNumberMgr.reset(); - } - Error handleResponse(SequenceNumberT SeqNo) { - auto I = PendingResponses.find(SeqNo); - if (I == PendingResponses.end()) { - abandonPendingResponses(); - return orcError(OrcErrorCode::UnexpectedRPCResponse); + using Handler = typename decltype(PendingResponses)::mapped_type; + Handler PRHandler; + + { + // Lock the pending responses map and sequence number manager. + std::unique_lock Lock(ResponsesMutex); + auto I = PendingResponses.find(SeqNo); + + if (I != PendingResponses.end()) { + PRHandler = std::move(I->second); + PendingResponses.erase(I); + SequenceNumberMgr.releaseSequenceNumber(SeqNo); + } else { + // Unlock the pending results map to prevent recursive lock. + Lock.unlock(); + abandonPendingResponses(); + return orcError(OrcErrorCode::UnexpectedRPCResponse); + } } - auto PRHandler = std::move(I->second); - PendingResponses.erase(I); - SequenceNumberMgr.releaseSequenceNumber(SeqNo); + assert(PRHandler && + "If we didn't find a response handler we should have bailed out"); if (auto Err = PRHandler->handleResponse(C)) { abandonPendingResponses(); - SequenceNumberMgr.reset(); return Err; } @@ -915,7 +947,7 @@ protected: FunctionIdT handleNegotiate(const std::string &Name) { auto I = LocalFunctionIds.find(Name); if (I == LocalFunctionIds.end()) - return FnIdAllocator.getInvalidId(); + return getInvalidFunctionId(); return I->second; } @@ -938,7 +970,7 @@ protected: // If autonegotiation indicates that the remote end doesn't support this // function, return an unknown function error. - if (RemoteId == FnIdAllocator.getInvalidId()) + if (RemoteId == getInvalidFunctionId()) return orcError(OrcErrorCode::UnknownRPCFunction); // Autonegotiation succeeded and returned a valid id. Update the map and @@ -1012,6 +1044,7 @@ protected: std::map Handlers; + std::mutex ResponsesMutex; detail::SequenceNumberManager SequenceNumberMgr; std::map>> PendingResponses; @@ -1021,17 +1054,18 @@ protected: template -class MultiThreadedRPC - : public detail::RPCBase< - MultiThreadedRPC, ChannelT, - FunctionIdT, SequenceNumberT> { +class MultiThreadedRPCEndpoint + : public detail::RPCEndpointBase< + MultiThreadedRPCEndpoint, + ChannelT, FunctionIdT, SequenceNumberT> { private: using BaseClass = - detail::RPCBase, - ChannelT, FunctionIdT, SequenceNumberT>; + detail::RPCEndpointBase< + MultiThreadedRPCEndpoint, + ChannelT, FunctionIdT, SequenceNumberT>; public: - MultiThreadedRPC(ChannelT &C, bool LazyAutoNegotiation) + MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation) : BaseClass(C, LazyAutoNegotiation) {} /// The LaunchPolicy type allows a launch policy to be specified when adding @@ -1061,30 +1095,41 @@ public: std::move(Launch)); } + /// Add a class-method as a handler. + template + void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...), + LaunchPolicy Launch = LaunchPolicy()) { + addHandler( + detail::MemberFnWrapper(Object, Method), + Launch); + } + /// Negotiate a function id for Func with the other end of the channel. - template Error negotiateFunction() { + template Error negotiateFunction(bool Retry = false) { using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate; + // Check if we already have a function id... + auto I = this->RemoteFunctionIds.find(Func::getPrototype()); + if (I != this->RemoteFunctionIds.end()) { + // If it's valid there's nothing left to do. + if (I->second != this->getInvalidFunctionId()) + return Error::success(); + // If it's invalid and we can't re-attempt negotiation, throw an error. + if (!Retry) + return orcError(OrcErrorCode::UnknownRPCFunction); + } + + // We don't have a function id for Func yet, call the remote to try to + // negotiate one. if (auto RemoteIdOrErr = callB(Func::getPrototype())) { this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr; + if (*RemoteIdOrErr == this->getInvalidFunctionId()) + return orcError(OrcErrorCode::UnknownRPCFunction); return Error::success(); } else return RemoteIdOrErr.takeError(); } - /// Convenience method for negotiating multiple functions at once. - template Error negotiateFunctions() { - return negotiateFunction(); - } - - /// Convenience method for negotiating multiple functions at once. - template - Error negotiateFunctions() { - if (auto Err = negotiateFunction()) - return Err; - return negotiateFunctions(); - } - /// Return type for non-blocking call primitives. template using NonBlockingCallResult = typename detail::ResultTraits< @@ -1169,19 +1214,20 @@ public: template -class SingleThreadedRPC - : public detail::RPCBase< - SingleThreadedRPC, ChannelT, - FunctionIdT, SequenceNumberT> { +class SingleThreadedRPCEndpoint + : public detail::RPCEndpointBase< + SingleThreadedRPCEndpoint, + ChannelT, FunctionIdT, SequenceNumberT> { private: using BaseClass = - detail::RPCBase, - ChannelT, FunctionIdT, SequenceNumberT>; + detail::RPCEndpointBase< + SingleThreadedRPCEndpoint, + ChannelT, FunctionIdT, SequenceNumberT>; using LaunchPolicy = typename BaseClass::LaunchPolicy; public: - SingleThreadedRPC(ChannelT &C, bool LazyAutoNegotiation) + SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation) : BaseClass(C, LazyAutoNegotiation) {} template @@ -1197,29 +1243,31 @@ public: } /// Negotiate a function id for Func with the other end of the channel. - template Error negotiateFunction() { + template Error negotiateFunction(bool Retry = false) { using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate; + // Check if we already have a function id... + auto I = this->RemoteFunctionIds.find(Func::getPrototype()); + if (I != this->RemoteFunctionIds.end()) { + // If it's valid there's nothing left to do. + if (I->second != this->getInvalidFunctionId()) + return Error::success(); + // If it's invalid and we can't re-attempt negotiation, throw an error. + if (!Retry) + return orcError(OrcErrorCode::UnknownRPCFunction); + } + + // We don't have a function id for Func yet, call the remote to try to + // negotiate one. if (auto RemoteIdOrErr = callB(Func::getPrototype())) { this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr; + if (*RemoteIdOrErr == this->getInvalidFunctionId()) + return orcError(OrcErrorCode::UnknownRPCFunction); return Error::success(); } else return RemoteIdOrErr.takeError(); } - /// Convenience method for negotiating multiple functions at once. - template Error negotiateFunctions() { - return negotiateFunction(); - } - - /// Convenience method for negotiating multiple functions at once. - template - Error negotiateFunctions() { - if (auto Err = negotiateFunction()) - return Err; - return negotiateFunctions(); - } - template typename detail::ResultTraits::ErrorReturnType @@ -1332,6 +1380,68 @@ private: uint32_t NumOutstandingCalls; }; +/// @brief Convenience class for grouping RPC Functions into APIs that can be +/// negotiated as a block. +/// +template +class APICalls { +public: + + /// @brief Test whether this API contains Function F. + template + class Contains { + public: + static const bool value = false; + }; + + /// @brief Negotiate all functions in this API. + template + static Error negotiate(RPCEndpoint &R) { + return Error::success(); + } +}; + +template +class APICalls { +public: + + template + class Contains { + public: + static const bool value = std::is_same::value | + APICalls::template Contains::value; + }; + + template + static Error negotiate(RPCEndpoint &R) { + if (auto Err = R.template negotiateFunction()) + return Err; + return APICalls::negotiate(R); + } + +}; + +template +class APICalls, Funcs...> { +public: + + template + class Contains { + public: + static const bool value = + APICalls::template Contains::value | + APICalls::template Contains::value; + }; + + template + static Error negotiate(RPCEndpoint &R) { + if (auto Err = APICalls::negotiate(R)) + return Err; + return APICalls::negotiate(R); + } + +}; + } // end namespace rpc } // end namespace orc } // end namespace llvm diff --git a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h index 83a7b9a844f2..3b6c84eb1965 100644 --- a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h +++ b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h @@ -48,9 +48,7 @@ public: template Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) { writeLock.lock(); - if (auto Err = serializeSeq(*this, FnId, SeqNo)) - return Err; - return Error::success(); + return serializeSeq(*this, FnId, SeqNo); } /// Notify the channel that we're ending a message send. diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index a8c8ff9ef2eb..aeb66633f2c8 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -28,14 +28,14 @@ template <> struct ScalarEnumerationTraits { template <> struct MappingTraits { static void mapping(IO &io, TypeTestResolution &res) { - io.mapRequired("Kind", res.TheKind); - io.mapRequired("SizeBitWidth", res.SizeBitWidth); + io.mapOptional("Kind", res.TheKind); + io.mapOptional("SizeBitWidth", res.SizeBitWidth); } }; template <> struct MappingTraits { static void mapping(IO &io, TypeIdSummary& summary) { - io.mapRequired("TTRes", summary.TTRes); + io.mapOptional("TTRes", summary.TTRes); } }; @@ -53,7 +53,7 @@ namespace yaml { template <> struct MappingTraits { static void mapping(IO &io, FunctionSummaryYaml& summary) { - io.mapRequired("TypeTests", summary.TypeTests); + io.mapOptional("TypeTests", summary.TypeTests); } }; @@ -100,8 +100,8 @@ template <> struct CustomMappingTraits { template <> struct MappingTraits { static void mapping(IO &io, ModuleSummaryIndex& index) { - io.mapRequired("GlobalValueMap", index.GlobalValueMap); - io.mapRequired("TypeIdMap", index.TypeIdMap); + io.mapOptional("GlobalValueMap", index.GlobalValueMap); + io.mapOptional("TypeIdMap", index.TypeIdMap); } }; diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index 7a63956f1cdb..2e95f67a14a9 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -879,18 +879,22 @@ extern template class AnalysisManager; /// \brief Convenience typedef for the Function analysis manager. typedef AnalysisManager FunctionAnalysisManager; -/// \brief A module analysis which acts as a proxy for a function analysis -/// manager. +/// \brief An analysis over an "outer" IR unit that provides access to an +/// analysis manager over an "inner" IR unit. The inner unit must be contained +/// in the outer unit. /// -/// This primarily proxies invalidation information from the module analysis -/// manager and module pass manager to a function analysis manager. You should -/// never use a function analysis manager from within (transitively) a module -/// pass manager unless your parent module pass has received a proxy result -/// object for it. +/// Fore example, InnerAnalysisManagerProxy is +/// an analysis over Modules (the "outer" unit) that provides access to a +/// Function analysis manager. The FunctionAnalysisManager is the "inner" +/// manager being proxied, and Functions are the "inner" unit. The inner/outer +/// relationship is valid because each Function is contained in one Module. /// -/// Note that the proxy's result is a move-only object and represents ownership -/// of the validity of the analyses in the \c FunctionAnalysisManager it -/// provides. +/// If you're (transitively) within a pass manager for an IR unit U that +/// contains IR unit V, you should never use an analysis manager over V, except +/// via one of these proxies. +/// +/// Note that the proxy's result is a move-only RAII object. The validity of +/// the analyses in the inner analysis manager is tied to its lifetime. template class InnerAnalysisManagerProxy : public AnalysisInfoMixin< @@ -926,23 +930,16 @@ public: /// \brief Accessor for the analysis manager. AnalysisManagerT &getManager() { return *InnerAM; } - /// \brief Handler for invalidation of the outer IR unit. - /// - /// If this analysis itself is preserved, then we assume that the set of \c - /// IR units that the inner analysis manager controls hasn't changed and - /// thus we don't need to invalidate *all* cached data associated with any - /// \c IRUnitT* in the \c AnalysisManagerT. + /// \brief Handler for invalidation of the outer IR unit, \c IRUnitT. /// - /// Regardless of whether this analysis is marked as preserved, all of the - /// analyses in the \c AnalysisManagerT are potentially invalidated (for - /// the relevant inner set of their IR units) based on the set of preserved - /// analyses. + /// If the proxy analysis itself is not preserved, we assume that the set of + /// inner IR objects contained in IRUnit may have changed. In this case, + /// we have to call \c clear() on the inner analysis manager, as it may now + /// have stale pointers to its inner IR objects. /// - /// Because this needs to understand the mapping from one IR unit to an - /// inner IR unit, this method isn't defined in the primary template. - /// Instead, each specialization of this template will need to provide an - /// explicit specialization of this method to handle that particular pair - /// of IR unit and inner AnalysisManagerT. + /// Regardless of whether the proxy analysis is marked as preserved, all of + /// the analyses in the inner analysis manager are potentially invalidated + /// based on the set of preserved analyses. bool invalidate( IRUnitT &IR, const PreservedAnalyses &PA, typename AnalysisManager::Invalidator &Inv); @@ -956,13 +953,9 @@ public: /// \brief Run the analysis pass and create our proxy result object. /// - /// This doesn't do any interesting work, it is primarily used to insert our - /// proxy result object into the module analysis cache so that we can proxy - /// invalidation to the function analysis manager. - /// - /// In debug builds, it will also assert that the analysis manager is empty - /// as no queries should arrive at the function analysis manager prior to - /// this analysis being requested. + /// This doesn't do any interesting work; it is primarily used to insert our + /// proxy result object into the outer analysis cache so that we can proxy + /// invalidation to the inner analysis manager. Result run(IRUnitT &IR, AnalysisManager &AM, ExtraArgTs...) { return Result(*InnerAM); @@ -996,22 +989,24 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate( extern template class InnerAnalysisManagerProxy; -/// \brief A function analysis which acts as a proxy for a module analysis -/// manager. +/// \brief An analysis over an "inner" IR unit that provides access to an +/// analysis manager over a "outer" IR unit. The inner unit must be contained +/// in the outer unit. /// -/// This primarily provides an accessor to a parent module analysis manager to -/// function passes. Only the const interface of the module analysis manager is -/// provided to indicate that once inside of a function analysis pass you -/// cannot request a module analysis to actually run. Instead, the user must -/// rely on the \c getCachedResult API. +/// For example OuterAnalysisManagerProxy is an +/// analysis over Functions (the "inner" unit) which provides access to a Module +/// analysis manager. The ModuleAnalysisManager is the "outer" manager being +/// proxied, and Modules are the "outer" IR unit. The inner/outer relationship +/// is valid because each Function is contained in one Module. /// -/// The invalidation provided by this proxy involves tracking when an -/// invalidation event in the outer analysis manager needs to trigger an -/// invalidation of a particular analysis on this IR unit. +/// This proxy only exposes the const interface of the outer analysis manager, +/// to indicate that you cannot cause an outer analysis to run from within an +/// inner pass. Instead, you must rely on the \c getCachedResult API. /// -/// Because outer analyses aren't invalidated while these IR units are being -/// precessed, we have to register and handle these as deferred invalidation -/// events. +/// This proxy doesn't manage invalidation in any way -- that is handled by the +/// recursive return path of each layer of the pass manager. A consequence of +/// this is the outer analyses may be stale. We invalidate the outer analyses +/// only when we're done running passes over the inner IR units. template class OuterAnalysisManagerProxy : public AnalysisInfoMixin< @@ -1024,7 +1019,7 @@ public: const AnalysisManagerT &getManager() const { return *AM; } - /// \brief Handle invalidation by ignoring it, this pass is immutable. + /// \brief Handle invalidation by ignoring it; this pass is immutable. bool invalidate( IRUnitT &, const PreservedAnalyses &, typename AnalysisManager::Invalidator &) { @@ -1089,18 +1084,15 @@ AnalysisKey extern template class OuterAnalysisManagerProxy; -/// Provide the \c ModuleAnalysisManager to \c Fucntion proxy. +/// Provide the \c ModuleAnalysisManager to \c Function proxy. typedef OuterAnalysisManagerProxy ModuleAnalysisManagerFunctionProxy; /// \brief Trivial adaptor that maps from a module to its functions. /// /// Designed to allow composition of a FunctionPass(Manager) and -/// a ModulePassManager. Note that if this pass is constructed with a pointer -/// to a \c ModuleAnalysisManager it will run the -/// \c FunctionAnalysisManagerModuleProxy analysis prior to running the function -/// pass over the module to enable a \c FunctionAnalysisManager to be used -/// within this run safely. +/// a ModulePassManager, by running the FunctionPass(Manager) over every +/// function in the module. /// /// Function passes run within this adaptor can rely on having exclusive access /// to the function they are run over. They should not read or modify any other @@ -1115,6 +1107,10 @@ typedef OuterAnalysisManagerProxy /// module. /// FIXME: Make the above true for all of LLVM's actual passes, some still /// violate this principle. +/// +/// Note that although function passes can access module analyses, module +/// analyses are not invalidated while the function passes are running, so they +/// may be stale. Function analyses will not be stale. template class ModuleToFunctionPassAdaptor : public PassInfoMixin> { @@ -1124,7 +1120,6 @@ public: /// \brief Runs the function pass across every function in the module. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { - // Setup the function analysis manager from its proxy. FunctionAnalysisManager &FAM = AM.getResult(M).getManager(); @@ -1145,10 +1140,11 @@ public: PA.intersect(std::move(PassPA)); } - // By definition we preserve the proxy. We also preserve all analyses on - // Function units. This precludes *any* invalidation of function analyses - // by the proxy, but that's OK because we've taken care to invalidate - // analyses in the function analysis manager incrementally above. + // The FunctionAnalysisManagerModuleProxy is preserved because (we assume) + // the function passes we ran didn't add or remove any functions. + // + // We also preserve all analyses on Functions, because we did all the + // invalidation we needed to do above. PA.preserveSet>(); PA.preserve(); return PA; @@ -1166,7 +1162,7 @@ createModuleToFunctionPassAdaptor(FunctionPassT Pass) { return ModuleToFunctionPassAdaptor(std::move(Pass)); } -/// \brief A template utility pass to force an analysis result to be available. +/// \brief A utility pass template to force an analysis result to be available. /// /// If there are extra arguments at the pass's run level there may also be /// extra arguments to the analysis manager's \c getResult routine. We can't @@ -1196,17 +1192,14 @@ struct RequireAnalysisPass } }; -/// \brief A template utility pass to force an analysis result to be -/// invalidated. -/// -/// This is a no-op pass which simply forces a specific analysis result to be -/// invalidated when it is run. +/// \brief A no-op pass template which simply forces a specific analysis result +/// to be invalidated. template struct InvalidateAnalysisPass : PassInfoMixin> { /// \brief Run this pass over some unit of IR. /// - /// This pass can be run over any unit of IR and use any analysis manager + /// This pass can be run over any unit of IR and use any analysis manager, /// provided they satisfy the basic API requirements. When this pass is /// created, these methods can be instantiated to satisfy whatever the /// context requires. @@ -1218,10 +1211,10 @@ struct InvalidateAnalysisPass } }; -/// \brief A utility pass that does nothing but preserves no analyses. +/// \brief A utility pass that does nothing, but preserves no analyses. /// -/// As a consequence fo not preserving any analyses, this pass will force all -/// analysis passes to be re-run to produce fresh results if any are needed. +/// Because this preserves no analyses, any analysis passes queried after this +/// pass runs will recompute fresh results. struct InvalidateAllAnalysesPass : PassInfoMixin { /// \brief Run this pass over some unit of IR. template diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index e6fe97484580..c907d6b670b5 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -238,6 +238,26 @@ public: return make_range(value_op_begin(), value_op_end()); } + struct const_value_op_iterator + : iterator_adaptor_base { + explicit const_value_op_iterator(const Use *U = nullptr) : + iterator_adaptor_base(U) {} + const Value *operator*() const { return *I; } + const Value *operator->() const { return operator*(); } + }; + + const_value_op_iterator value_op_begin() const { + return const_value_op_iterator(op_begin()); + } + const_value_op_iterator value_op_end() const { + return const_value_op_iterator(op_end()); + } + iterator_range operand_values() const { + return make_range(value_op_begin(), value_op_end()); + } + /// \brief Drop all references to operands. /// /// This function is in charge of "letting go" of all objects that this User diff --git a/include/llvm/Support/Path.h b/include/llvm/Support/Path.h index 0513350d446b..2bbcef0c293f 100644 --- a/include/llvm/Support/Path.h +++ b/include/llvm/Support/Path.h @@ -207,6 +207,14 @@ void native(const Twine &path, SmallVectorImpl &result); /// @param path A path that is transformed to native format. void native(SmallVectorImpl &path); +/// @brief Replaces backslashes with slashes if Windows. +/// +/// @param path processed path +/// @result The result of replacing backslashes with forward slashes if Windows. +/// On Unix, this function is a no-op because backslashes are valid path +/// chracters. +std::string convert_to_slash(StringRef path); + /// @} /// @name Lexical Observers /// @{ diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 4bebc863b4a9..dd55062e56f1 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -215,9 +215,20 @@ ModulePass *createMetaRenamerPass(); /// manager. ModulePass *createBarrierNoopPass(); +/// What to do with the summary when running the LowerTypeTests pass. +enum class LowerTypeTestsSummaryAction { + None, ///< Do nothing. + Import, ///< Import typeid resolutions from summary and globals. + Export, ///< Export typeid resolutions to summary and globals. +}; + /// \brief This pass lowers type metadata and the llvm.type.test intrinsic to /// bitsets. -ModulePass *createLowerTypeTestsPass(); +/// \param Action What to do with the summary passed as Index. +/// \param Index The summary to use for importing or exporting, this can be null +/// when Action is None. +ModulePass *createLowerTypeTestsPass(LowerTypeTestsSummaryAction Action, + ModuleSummaryIndex *Index); /// \brief This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 9f9ce467337e..abfb24f0fe50 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -21,7 +21,6 @@ #include namespace llvm { -class ModuleSummaryIndex; class Pass; class TargetLibraryInfoImpl; class TargetMachine; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index b4686a1ff175..8da2f0981d0c 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1106,6 +1106,16 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; + // udiv %V, C -> 0 if %V < C + if (MaxRecurse) { + if (Constant *C = dyn_cast_or_null(SimplifyICmpInst( + ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { + if (C->isAllOnesValue()) { + return Constant::getNullValue(Op0->getType()); + } + } + } + return nullptr; } @@ -1247,6 +1257,16 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; + // urem %V, C -> %V if %V < C + if (MaxRecurse) { + if (Constant *C = dyn_cast_or_null(SimplifyICmpInst( + ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) { + if (C->isAllOnesValue()) { + return Op0; + } + } + } + return nullptr; } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 19c0171740c9..3d85ef6988a9 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -179,9 +179,9 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { } bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const { - // For each block we check that it doesn't have any uses outside of it's - // innermost loop. This process will transitivelly guarntee that current loop - // and all of the nested loops are in the LCSSA form. + // For each block we check that it doesn't have any uses outside of its + // innermost loop. This process will transitively guarantee that the current + // loop and all of the nested loops are in LCSSA form. return all_of(this->blocks(), [&](const BasicBlock *BB) { return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT); }); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 2746361ab4b5..e7415e623196 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -344,38 +344,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, if (!InvariantGroupMD) return MemDepResult::getUnknown(); - Value *LoadOperand = LI->getPointerOperand(); + // Take the ptr operand after all casts and geps 0. This way we can search + // cast graph down only. + Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts(); + // It's is not safe to walk the use list of global value, because function // passes aren't allowed to look outside their functions. + // FIXME: this could be fixed by filtering instructions from outside + // of current function. if (isa(LoadOperand)) return MemDepResult::getUnknown(); // Queue to process all pointers that are equivalent to load operand. SmallVector LoadOperandsQueue; - SmallSet SeenValues; - auto TryInsertToQueue = [&](Value *V) { - if (SeenValues.insert(V).second) - LoadOperandsQueue.push_back(V); - }; - - TryInsertToQueue(LoadOperand); + LoadOperandsQueue.push_back(LoadOperand); while (!LoadOperandsQueue.empty()) { const Value *Ptr = LoadOperandsQueue.pop_back_val(); - assert(Ptr); - if (isa(Ptr)) - continue; - - // Value comes from bitcast: Ptr = bitcast x. Insert x. - if (auto *BCI = dyn_cast(Ptr)) - TryInsertToQueue(BCI->getOperand(0)); - // Gep with zeros is equivalent to bitcast. - // FIXME: we are not sure if some bitcast should be canonicalized to gep 0 - // or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless - // pointers will be upstream then both cases will be gone (and this BFS - // also won't be needed). - if (auto *GEP = dyn_cast(Ptr)) - if (GEP->hasAllZeroIndices()) - TryInsertToQueue(GEP->getOperand(0)); + assert(Ptr && !isa(Ptr) && + "Null or GlobalValue should not be inserted"); for (const Use &Us : Ptr->uses()) { auto *U = dyn_cast(Us.getUser()); @@ -385,13 +371,17 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // Bitcast or gep with zeros are using Ptr. Add to queue to check it's // users. U = bitcast Ptr if (isa(U)) { - TryInsertToQueue(U); + LoadOperandsQueue.push_back(U); continue; } - // U = getelementptr Ptr, 0, 0... + // Gep with zeros is equivalent to bitcast. + // FIXME: we are not sure if some bitcast should be canonicalized to gep 0 + // or gep 0 to bitcast because of SROA, so there are 2 forms. When + // typeless pointers will be ready then both cases will be gone + // (and this BFS also won't be needed). if (auto *GEP = dyn_cast(U)) if (GEP->hasAllZeroIndices()) { - TryInsertToQueue(U); + LoadOperandsQueue.push_back(U); continue; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 5e566bcdaff4..44f1a6dde0d2 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -10012,6 +10012,18 @@ void ScalarEvolution::verify() const { // TODO: Verify more things. } +bool ScalarEvolution::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // Invalidate the ScalarEvolution object whenever it isn't preserved or one + // of its dependencies is invalidated. + auto PAC = PA.getChecker(); + return !(PAC.preserved() || PAC.preservedSet>()) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA); +} + AnalysisKey ScalarEvolutionAnalysis::Key; ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 073b4e6ab26a..d31472c0d33c 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3257,6 +3257,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Intrinsic::dbg_value: return true; + case Intrinsic::bitreverse: case Intrinsic::bswap: case Intrinsic::ctlz: case Intrinsic::ctpop: diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index 460d39cc28d8..4a5d18e2db75 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -429,7 +429,7 @@ class MetadataLoader::MetadataLoaderImpl { /// Populate the index above to enable lazily loading of metadata, and load /// the named metadata as well as the transitively referenced global /// Metadata. - Expected lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders); + Expected lazyLoadModuleMetadataBlock(); /// On-demand loading of a single metadata. Requires the index above to be /// populated. @@ -516,8 +516,8 @@ Error error(const Twine &Message) { Message, make_error_code(BitcodeError::CorruptedBitcode)); } -Expected MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock( - PlaceholderQueue &Placeholders) { +Expected +MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { IndexCursor = Stream; SmallVector Record; // Get the abbrevs, and preload record positions to make them lazy-loadable. @@ -701,7 +701,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { // then load individual record as needed, starting with the named metadata. if (ModuleLevel && IsImporting && MetadataList.empty() && !DisableLazyLoading) { - auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders); + auto SuccessOrErr = lazyLoadModuleMetadataBlock(); if (!SuccessOrErr) return SuccessOrErr.takeError(); if (SuccessOrErr.get()) { @@ -1561,7 +1561,6 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment( return error("Invalid record"); SmallVector Record; - PlaceholderQueue Placeholders; while (true) { @@ -1608,10 +1607,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment( auto Idx = Record[i + 1]; if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) && - !MetadataList.lookup(Idx)) + !MetadataList.lookup(Idx)) { // Load the attachment if it is in the lazy-loadable range and hasn't // been loaded yet. lazyLoadOneMetadata(Idx, Placeholders); + resolveForwardRefsAndPlaceholders(Placeholders); + } Metadata *Node = MetadataList.getMetadataFwdRef(Idx); if (isa(Node)) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a37f4e1116b4..6b62f11f1240 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1714,7 +1714,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, EVT CCT = getSetCCResultType(NVT); // Hi part is always the same op - Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH}); + Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); // We need to know whether to select Lo part that corresponds to 'winning' // Hi part or if Hi parts are equal. @@ -1725,7 +1725,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); // Recursed Lo part if Hi parts are equal, this uses unsigned version - SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL}); + SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL}); Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index bae828a2263c..234b2043a6a1 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { I != E; ++I) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; - int FirstSS, SecondSS; if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS && FirstSS != -1) { @@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { } MachineBasicBlock::iterator NextMI = std::next(I); - if (NextMI == MBB->end()) continue; + MachineBasicBlock::iterator ProbableLoadMI = I; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS))) continue; + // Skip the ...pseudo debugging... instructions between a load and store. + while ((NextMI != E) && NextMI->isDebugValue()) { + ++NextMI; + ++I; + } + if (NextMI == E) continue; if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS))) continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; @@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { ++NumDead; - toErase.push_back(&*I); + toErase.push_back(&*ProbableLoadMI); } toErase.push_back(&*NextMI); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index a5a30fab5b69..8f6b1849169a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -896,6 +896,48 @@ uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType, return ELF::R_MIPS_NONE; } +// Sometimes we don't need to create thunk for a branch. +// This typically happens when branch target is located +// in the same object file. In such case target is either +// a weak symbol or symbol in a different executable section. +// This function checks if branch target is located in the +// same object file and if distance between source and target +// fits R_AARCH64_CALL26 relocation. If both conditions are +// met, it emits direct jump to the target and returns true. +// Otherwise false is returned and thunk is created. +bool RuntimeDyldELF::resolveAArch64ShortBranch( + unsigned SectionID, relocation_iterator RelI, + const RelocationValueRef &Value) { + uint64_t Address; + if (Value.SymbolName) { + auto Loc = GlobalSymbolTable.find(Value.SymbolName); + + // Don't create direct branch for external symbols. + if (Loc == GlobalSymbolTable.end()) + return false; + + const auto &SymInfo = Loc->second; + Address = + uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( + SymInfo.getOffset())); + } else { + Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); + } + uint64_t Offset = RelI->getOffset(); + uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); + + // R_AARCH64_CALL26 requires immediate to be in range -2^27 <= imm < 2^27 + // If distance between source and target is out of range then we should + // create thunk. + if (!isInt<28>(Address + Value.Addend - SourceAddress)) + return false; + + resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), + Value.Addend); + + return true; +} + Expected RuntimeDyldELF::processRelocationRef( unsigned SectionID, relocation_iterator RelI, const ObjectFile &O, @@ -1003,7 +1045,7 @@ RuntimeDyldELF::processRelocationRef( (uint64_t)Section.getAddressWithOffset(i->second), RelType, 0); DEBUG(dbgs() << " Stub function found\n"); - } else { + } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); Stubs[Value] = Section.getStubOffset(); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 796127ab92bd..d1867d091fe2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -40,6 +40,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); + bool resolveAArch64ShortBranch(unsigned SectionID, relocation_iterator RelI, + const RelocationValueRef &Value); + void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 66ffe6db29d6..928f69a17de9 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -196,8 +196,15 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, }; FunctionImporter Importer(Index, Loader); - if (!Importer.importFunctions(TheModule, ImportList)) + Expected Result = Importer.importFunctions(TheModule, ImportList); + if (!Result) { + handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) { + SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(), + SourceMgr::DK_Error, EIB.message()); + Err.print("ThinLTO", errs()); + }); report_fatal_error("importFunctions failed"); + } } static void optimizeModule(Module &TheModule, TargetMachine &TM, diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 40105000c56c..5b018676eba3 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -2823,7 +2823,11 @@ StringRef MachORebaseEntry::typeName() const { } bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { +#ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif return (Ptr == Other.Ptr) && (RemainingLoopCount == Other.RemainingLoopCount) && (Done == Other.Done); @@ -3073,7 +3077,11 @@ uint32_t MachOBindEntry::flags() const { return Flags; } int MachOBindEntry::ordinal() const { return Ordinal; } bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { +#ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif return (Ptr == Other.Ptr) && (RemainingLoopCount == Other.RemainingLoopCount) && (Done == Other.Done); diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp index 202783e7d993..11ace84b9ceb 100644 --- a/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ b/lib/Object/ModuleSummaryIndexObjectFile.cpp @@ -22,6 +22,12 @@ using namespace llvm; using namespace object; +static llvm::cl::opt IgnoreEmptyThinLTOIndexFile( + "ignore-empty-index-file", llvm::cl::ZeroOrMore, + llvm::cl::desc( + "Ignore an empty index file and perform non-ThinLTO compilation"), + llvm::cl::init(false)); + ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( MemoryBufferRef Object, std::unique_ptr I) : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { @@ -97,6 +103,8 @@ llvm::getModuleSummaryIndexForFile(StringRef Path) { if (EC) return errorCodeToError(EC); MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); + if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) + return nullptr; Expected> ObjOrErr = object::ModuleSummaryIndexObjectFile::create(BufferRef); if (!ObjOrErr) diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 0a989706b436..3889902eea54 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -373,7 +373,7 @@ void Option::removeArgument() { GlobalParser->removeOption(this); } void Option::setArgStr(StringRef S) { if (FullyInitialized) GlobalParser->updateArgStr(this, S); - assert(S[0] != '-' && "Option can't start with '-"); + assert((S.empty() || S[0] != '-') && "Option can't start with '-"); ArgStr = S; } diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 0616d05aff57..4bb035eeccca 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -571,6 +571,16 @@ void native(SmallVectorImpl &Path) { #endif } +std::string convert_to_slash(StringRef path) { +#ifdef LLVM_ON_WIN32 + std::string s = path.str(); + std::replace(s.begin(), s.end(), '\\', '/'); + return s; +#else + return path; +#endif +} + StringRef filename(StringRef path) { return *rbegin(path); } diff --git a/lib/Support/TarWriter.cpp b/lib/Support/TarWriter.cpp index 5fc17d276377..f79b364dc1f7 100644 --- a/lib/Support/TarWriter.cpp +++ b/lib/Support/TarWriter.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" using namespace llvm; @@ -109,27 +110,44 @@ static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) { pad(OS); } +// In the Ustar header, a path can be split at any '/' to store +// a path into UstarHeader::Name and UstarHeader::Prefix. This +// function splits a given path for that purpose. +static std::pair splitPath(StringRef Path) { + if (Path.size() <= sizeof(UstarHeader::Name)) + return {"", Path}; + size_t Sep = Path.rfind('/', sizeof(UstarHeader::Name) + 1); + if (Sep == StringRef::npos) + return {"", Path}; + return {Path.substr(0, Sep), Path.substr(Sep + 1)}; +} + +// Returns true if a given path can be stored to a Ustar header +// without the PAX extension. +static bool fitsInUstar(StringRef Path) { + StringRef Prefix; + StringRef Name; + std::tie(Prefix, Name) = splitPath(Path); + return Name.size() <= sizeof(UstarHeader::Name); +} + // The PAX header is an extended format, so a PAX header needs // to be followed by a "real" header. static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) { + StringRef Prefix; + StringRef Name; + std::tie(Prefix, Name) = splitPath(Path); + UstarHeader Hdr = {}; - memcpy(Hdr.Name, Path.data(), Path.size()); + memcpy(Hdr.Name, Name.data(), Name.size()); memcpy(Hdr.Mode, "0000664", 8); snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size); memcpy(Hdr.Magic, "ustar", 6); + memcpy(Hdr.Prefix, Prefix.data(), Prefix.size()); computeChecksum(Hdr); OS << StringRef(reinterpret_cast(&Hdr), sizeof(Hdr)); } -// We want to use '/' as a path separator even on Windows. -// This function canonicalizes a given path. -static std::string canonicalize(std::string S) { -#ifdef LLVM_ON_WIN32 - std::replace(S.begin(), S.end(), '\\', '/'); -#endif - return S; -} - // Creates a TarWriter instance and returns it. Expected> TarWriter::create(StringRef OutputPath, StringRef BaseDir) { @@ -145,8 +163,8 @@ TarWriter::TarWriter(int FD, StringRef BaseDir) // Append a given file to an archive. void TarWriter::append(StringRef Path, StringRef Data) { // Write Path and Data. - std::string S = BaseDir + "/" + canonicalize(Path) + "\0"; - if (S.size() <= sizeof(UstarHeader::Name)) { + std::string S = BaseDir + "/" + sys::path::convert_to_slash(Path) + "\0"; + if (fitsInUstar(S)) { writeUstarHeader(OS, S, Data.size()); } else { writePaxHeader(OS, S); diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ef3b44f7c211..2b4fc5397b18 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -608,6 +608,10 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, if ((C = dyn_cast(Addr))) { Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && + (C = dyn_cast(Addr.getOperand(0)))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && (C = dyn_cast(Addr.getOperand(1)))) { Base = Addr.getOperand(0); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 0b0a0e7d083e..730bcdcf7afa 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -172,16 +172,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v2f64, Promote); AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); - setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); - setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); - - setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); - setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); - - setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); - setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); - setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); - setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a6c31629e7c4..da9d009c542b 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -822,6 +822,7 @@ public: bool isForcedVOP3() const { return ForcedEncodingSize == 64; } bool isForcedDPP() const { return ForcedDPP; } bool isForcedSDWA() const { return ForcedSDWA; } + ArrayRef getMatchedVariants() const; std::unique_ptr parseRegister(); bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; @@ -1630,31 +1631,44 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } +// What asm variants we should check +ArrayRef AMDGPUAsmParser::getMatchedVariants() const { + if (getForcedEncodingSize() == 32) { + static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; + return makeArrayRef(Variants); + } + + if (isForcedVOP3()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; + return makeArrayRef(Variants); + } + + if (isForcedSDWA()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA}; + return makeArrayRef(Variants); + } + + if (isForcedDPP()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; + return makeArrayRef(Variants); + } + + static const unsigned Variants[] = { + AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP + }; + + return makeArrayRef(Variants); +} + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { - // What asm variants we should check - std::vector MatchedVariants; - if (getForcedEncodingSize() == 32) { - MatchedVariants = {AMDGPUAsmVariants::DEFAULT}; - } else if (isForcedVOP3()) { - MatchedVariants = {AMDGPUAsmVariants::VOP3}; - } else if (isForcedSDWA()) { - MatchedVariants = {AMDGPUAsmVariants::SDWA}; - } else if (isForcedDPP()) { - MatchedVariants = {AMDGPUAsmVariants::DPP}; - } else { - MatchedVariants = {AMDGPUAsmVariants::DEFAULT, - AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, - AMDGPUAsmVariants::DPP}; - } - MCInst Inst; unsigned Result = Match_Success; - for (auto Variant : MatchedVariants) { + for (auto Variant : getMatchedVariants()) { uint64_t EI; auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, Variant); @@ -3486,7 +3500,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments - if ((BasicInstType == SIInstrFlags::VOPC || + if ((BasicInstType == SIInstrFlags::VOPC || BasicInstType == SIInstrFlags::VOP2)&& Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 89c9266746ac..de7ce5cb9e47 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -99,6 +99,18 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::i32, MVT::i8, Custom); setTruncStoreAction(MVT::i32, MVT::i16, Custom); + // We need to include these since trunc STORES to PRIVATE need + // special handling to accommodate RMW + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); + setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); + setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); + setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); + setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); + setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); @@ -1087,79 +1099,114 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth, SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const { SDLoc DL(Store); + //TODO: Who creates the i8 stores? + assert(Store->isTruncatingStore() + || Store->getValue().getValueType() == MVT::i8); + assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); - unsigned Mask = 0; + SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { - Mask = 0xff; + assert(Store->getAlignment() >= 1); + Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { - Mask = 0xffff; + assert(Store->getAlignment() >= 2); + Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + } else { + llvm_unreachable("Unsupported private trunc store"); } SDValue Chain = Store->getChain(); SDValue BasePtr = Store->getBasePtr(); + SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr, - DAG.getConstant(2, DL, MVT::i32)); - SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, - Chain, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32)); + SDValue LoadPtr = BasePtr; + if (!Offset.isUndef()) { + LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); + } + + // Get dword location + // TODO: this should be eliminated by the future SHR ptr, 2 + SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, + DAG.getConstant(0xfffffffc, DL, MVT::i32)); + + // Load dword + // TODO: can we be smarter about machine pointer info? + SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo()); - SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr, + Chain = Dst.getValue(1); + + // Get offset in dword + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); + // Convert byte offset to bit shift SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); + // TODO: Contrary to the name of the functiom, + // it also handles sub i32 non-truncating stores (like i1) SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Store->getValue()); + // Mask the value to the right type SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); + // Shift the value in place SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, MaskedValue, ShiftAmt); - SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getConstant(Mask, DL, MVT::i32), - ShiftAmt); - DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask, - DAG.getConstant(0xffffffff, DL, MVT::i32)); + // Shift the mask in place + SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); + + // Invert the mask. NOTE: if we had native ROL instructions we could + // use inverted mask + DstMask = DAG.getNOT(DL, DstMask, MVT::i32); + + // Cleanup the target bits Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); + // Add the new bits SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); - return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, - Chain, Value, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32)); + + // Store dword + // TODO: Can we be smarter about MachinePointerInfo? + return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *StoreNode = cast(Op); unsigned AS = StoreNode->getAddressSpace(); + + SDValue Chain = StoreNode->getChain(); + SDValue Ptr = StoreNode->getBasePtr(); SDValue Value = StoreNode->getValue(); - EVT ValueVT = Value.getValueType(); + + EVT VT = Value.getValueType(); EVT MemVT = StoreNode->getMemoryVT(); - unsigned Align = StoreNode->getAlignment(); + EVT PtrVT = Ptr.getValueType(); + SDLoc DL(Op); + + // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && - ValueVT.isVector()) { - return SplitVectorStore(Op, DAG); + VT.isVector()) { + return scalarizeVectorStore(StoreNode, DAG); } - // Private AS needs special fixes - if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) && + unsigned Align = StoreNode->getAlignment(); + if (Align < MemVT.getStoreSize() && !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) { return expandUnalignedStore(StoreNode, DAG); } - SDLoc DL(Op); - SDValue Chain = StoreNode->getChain(); - SDValue Ptr = StoreNode->getBasePtr(); + SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, + DAG.getConstant(2, DL, PtrVT)); if (AS == AMDGPUAS::GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { - EVT VT = Value.getValueType(); assert(VT.bitsLE(MVT::i32)); SDValue MaskConstant; if (MemVT == MVT::i8) { @@ -1169,15 +1216,19 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(StoreNode->getAlignment() >= 2); MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); } - SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr, - DAG.getConstant(2, DL, MVT::i32)); - SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(0x00000003, DL, VT)); + + SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, + DAG.getConstant(0x00000003, DL, PtrVT)); + SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, + DAG.getConstant(3, DL, VT)); + + // Put the mask in correct place + SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); + + // Put the mask in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); - SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, - DAG.getConstant(3, DL, VT)); - SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift); - SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift); + SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); + // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 // vector instead. SDValue Src[4] = { @@ -1191,12 +1242,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, Op->getVTList(), Args, MemVT, StoreNode->getMemOperand()); - } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && - ValueVT.bitsGE(MVT::i32)) { + } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) { // Convert pointer from byte address to dword address. - Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), - DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), - Ptr, DAG.getConstant(2, DL, MVT::i32))); + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { llvm_unreachable("Truncated and indexed stores not supported yet"); @@ -1207,49 +1255,22 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } } + // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes if (AS != AMDGPUAS::PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) return lowerPrivateTruncStore(StoreNode, DAG); - // Lowering for indirect addressing - const MachineFunction &MF = DAG.getMachineFunction(); - const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); - unsigned StackWidth = TFL->getStackWidth(MF); - - Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); - - if (ValueVT.isVector()) { - unsigned NumElemVT = ValueVT.getVectorNumElements(); - EVT ElemVT = ValueVT.getVectorElementType(); - SmallVector Stores(NumElemVT); - - assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " - "vector width in load"); - - for (unsigned i = 0; i < NumElemVT; ++i) { - unsigned Channel, PtrIncr; - getStackAddress(StackWidth, i, Channel, PtrIncr); - Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, - DAG.getConstant(PtrIncr, DL, MVT::i32)); - SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, - Value, DAG.getConstant(i, DL, MVT::i32)); - - Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, - Chain, Elem, Ptr, - DAG.getTargetConstant(Channel, DL, MVT::i32)); - } - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); - } else { - if (ValueVT == MVT::i8) { - Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); - } - Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32)); // Channel + // Standard i32+ store, tag it with DWORDADDR to note that the address + // has been shifted + if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); + return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); } - return Chain; + // Tagged i32+ stores will be matched by patterns + return SDValue(); } // return (512 + (kc_bank << 12) @@ -1299,51 +1320,50 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, LoadSDNode *Load = cast(Op); ISD::LoadExtType ExtType = Load->getExtensionType(); EVT MemVT = Load->getMemoryVT(); + assert(Load->getAlignment() >= MemVT.getStoreSize()); - // getBasePtr(); + SDValue Chain = Load->getChain(); + SDValue Offset = Load->getOffset(); - // Get Register holding the target. - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), - DAG.getConstant(2, DL, MVT::i32)); - // Load the Register. - SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), - Load->getChain(), - Ptr, - DAG.getTargetConstant(0, DL, MVT::i32), - Op.getOperand(2)); + SDValue LoadPtr = BasePtr; + if (!Offset.isUndef()) { + LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); + } + + // Get dword location + // NOTE: this should be eliminated by the future SHR ptr, 2 + SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, + DAG.getConstant(0xfffffffc, DL, MVT::i32)); + + // Load dword + // TODO: can we be smarter about machine pointer info? + SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo()); // Get offset within the register. SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, - Load->getBasePtr(), - DAG.getConstant(0x3, DL, MVT::i32)); + LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); // Bit offset of target byte (byteIdx * 8). SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); // Shift to the right. - Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt); + SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); // Eliminate the upper bits by setting them to ... EVT MemEltVT = MemVT.getScalarType(); - // ... ones. - if (ExtType == ISD::SEXTLOAD) { + if (ExtType == ISD::SEXTLOAD) { // ... ones. SDValue MemEltVTNode = DAG.getValueType(MemEltVT); - - SDValue Ops[] = { - DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode), - Load->getChain() - }; - - return DAG.getMergeValues(Ops, DL); + Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); + } else { // ... or zeros. + Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); } - // ... or zeros. SDValue Ops[] = { - DAG.getZeroExtendInReg(Ret, DL, MemEltVT), - Load->getChain() + Ret, + Read.getValue(1) // This should be our output chain }; return DAG.getMergeValues(Ops, DL); @@ -1365,12 +1385,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { - SDValue MergedValues[2] = { - scalarizeVectorLoad(LoadNode, DAG), - Chain - }; - return DAG.getMergeValues(MergedValues, DL); + if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && + VT.isVector()) { + return scalarizeVectorLoad(LoadNode, DAG); } int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); @@ -1421,8 +1439,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(MergedValues, DL); } - SDValue LoweredLoad; - // For most operations returning SDValue() will result in the node being // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we // need to manually expand loads that may be legal in some address spaces and @@ -1447,47 +1463,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } - // Lowering for indirect addressing - const MachineFunction &MF = DAG.getMachineFunction(); - const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); - unsigned StackWidth = TFL->getStackWidth(MF); - - Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); - - if (VT.isVector()) { - unsigned NumElemVT = VT.getVectorNumElements(); - EVT ElemVT = VT.getVectorElementType(); - SDValue Loads[4]; - - assert(NumElemVT <= 4); - assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " - "vector width in load"); - - for (unsigned i = 0; i < NumElemVT; ++i) { - unsigned Channel, PtrIncr; - getStackAddress(StackWidth, i, Channel, PtrIncr); - Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, - DAG.getConstant(PtrIncr, DL, MVT::i32)); - Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, - Chain, Ptr, - DAG.getTargetConstant(Channel, DL, MVT::i32), - Op.getOperand(2)); - } - EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT); - LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT)); - } else { - LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, - Chain, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32), // Channel - Op.getOperand(2)); + // DWORDADDR ISD marks already shifted address + if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { + assert(VT == MVT::i32); + Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); + return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); } - - SDValue Ops[2] = { - LoweredLoad, - Chain - }; - - return DAG.getMergeValues(Ops, DL); + return SDValue(); } SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 3a72e0791fd6..19795bdde647 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -1268,6 +1268,17 @@ let Predicates = [isR600] in { defm R600_ : RegisterLoadStore ; +// Hardcode channel to 0 +// NOTE: LSHR is not available here. LSHR is per family instruction +def : Pat < + (i32 (load_private ADDRIndirect:$addr) ), + (R600_RegisterLoad FRAMEri:$addr, (i32 0)) +>; +def : Pat < + (store_private i32:$val, ADDRIndirect:$addr), + (R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0)) +>; + //===----------------------------------------------------------------------===// // Pseudo instructions diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index c78e97dfd46f..9140fe6cd148 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -99,6 +99,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); + setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); + setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Expand); + setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); + setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand); + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); @@ -699,7 +711,8 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG, SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, - unsigned Offset, bool Signed) const { + unsigned Offset, bool Signed, + const ISD::InputArg *Arg) const { const DataLayout &DL = DAG.getDataLayout(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); @@ -713,20 +726,21 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); - SDValue Val; + SDValue Val = Load; + if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) && + VT.bitsLT(MemVT)) { + unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext; + Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT)); + } + if (MemVT.isFloatingPoint()) - Val = getFPExtOrFPTrunc(DAG, Load, SL, VT); + Val = getFPExtOrFPTrunc(DAG, Val, SL, VT); else if (Signed) - Val = DAG.getSExtOrTrunc(Load, SL, VT); + Val = DAG.getSExtOrTrunc(Val, SL, VT); else - Val = DAG.getZExtOrTrunc(Load, SL, VT); - - SDValue Ops[] = { - Val, - Load.getValue(1) - }; + Val = DAG.getZExtOrTrunc(Val, SL, VT); - return DAG.getMergeValues(Ops, SL); + return DAG.getMergeValues({ Val, Load.getValue(1) }, SL); } SDValue SITargetLowering::LowerFormalArguments( @@ -899,7 +913,8 @@ SDValue SITargetLowering::LowerFormalArguments( // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain, - Offset, Ins[i].Flags.isSExt()); + Offset, Ins[i].Flags.isSExt(), + &Ins[i]); Chains.push_back(Arg.getValue(1)); auto *ParamTy = diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 9583f6db6faa..6c04e4f30977 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -24,7 +24,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, unsigned Offset) const; SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, - SDValue Chain, unsigned Offset, bool Signed) const; + SDValue Chain, unsigned Offset, bool Signed, + const ISD::InputArg *Arg = nullptr) const; SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const override; SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, diff --git a/lib/Target/AVR/AVRISelDAGToDAG.cpp b/lib/Target/AVR/AVRISelDAGToDAG.cpp index 156a21dfecfe..462a7d57d2de 100644 --- a/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -203,8 +203,8 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode, std::vector &OutOps) { - assert(ConstraintCode == InlineAsm::Constraint_m || - ConstraintCode == InlineAsm::Constraint_Q && + assert((ConstraintCode == InlineAsm::Constraint_m || + ConstraintCode == InlineAsm::Constraint_Q) && "Unexpected asm memory constraint"); MachineRegisterInfo &RI = MF->getRegInfo(); diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index 53668f05b59b..07fc3f6890b8 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -14,6 +14,7 @@ #include "AVRISelLowering.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -1933,5 +1934,45 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +unsigned AVRTargetLowering::getRegisterByName(const char *RegName, + EVT VT, + SelectionDAG &DAG) const { + unsigned Reg; + + if (VT == MVT::i8) { + Reg = StringSwitch(RegName) + .Case("r0", AVR::R0).Case("r1", AVR::R1).Case("r2", AVR::R2) + .Case("r3", AVR::R3).Case("r4", AVR::R4).Case("r5", AVR::R5) + .Case("r6", AVR::R6).Case("r7", AVR::R7).Case("r8", AVR::R8) + .Case("r9", AVR::R9).Case("r10", AVR::R10).Case("r11", AVR::R11) + .Case("r12", AVR::R12).Case("r13", AVR::R13).Case("r14", AVR::R14) + .Case("r15", AVR::R15).Case("r16", AVR::R16).Case("r17", AVR::R17) + .Case("r18", AVR::R18).Case("r19", AVR::R19).Case("r20", AVR::R20) + .Case("r21", AVR::R21).Case("r22", AVR::R22).Case("r23", AVR::R23) + .Case("r24", AVR::R24).Case("r25", AVR::R25).Case("r26", AVR::R26) + .Case("r27", AVR::R27).Case("r28", AVR::R28).Case("r29", AVR::R29) + .Case("r30", AVR::R30).Case("r31", AVR::R31) + .Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30) + .Default(0); + } else { + Reg = StringSwitch(RegName) + .Case("r0", AVR::R1R0).Case("r2", AVR::R3R2) + .Case("r4", AVR::R5R4).Case("r6", AVR::R7R6) + .Case("r8", AVR::R9R8).Case("r10", AVR::R11R10) + .Case("r12", AVR::R13R12).Case("r14", AVR::R15R14) + .Case("r16", AVR::R17R16).Case("r18", AVR::R19R18) + .Case("r20", AVR::R21R20).Case("r22", AVR::R23R22) + .Case("r24", AVR::R25R24).Case("r26", AVR::R27R26) + .Case("r28", AVR::R29R28).Case("r30", AVR::R31R30) + .Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30) + .Default(0); + } + + if (Reg) + return Reg; + + report_fatal_error("Invalid register name global variable"); +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index 17074e1b1eee..a8cdc4e7ae23 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -116,6 +116,9 @@ public: std::vector &Ops, SelectionDAG &DAG) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; + private: SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc, SelectionDAG &DAG, SDLoc dl) const; diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp index cbe4466164f9..e38facead922 100644 --- a/lib/Target/BPF/BPFInstrInfo.cpp +++ b/lib/Target/BPF/BPFInstrInfo.cpp @@ -13,15 +13,13 @@ #include "BPF.h" #include "BPFInstrInfo.h" -#include "BPFSubtarget.h" -#include "BPFTargetMachine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" +#include +#include #define GET_INSTRINFO_CTOR_DTOR #include "BPFGenInstrInfo.inc" @@ -109,11 +107,11 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB, while (std::next(I) != MBB.end()) std::next(I)->eraseFromParent(); Cond.clear(); - FBB = 0; + FBB = nullptr; // Delete the J if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); continue; diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index b0037fbc16ac..9beefcdcc1d5 100644 --- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -12,16 +12,15 @@ //===----------------------------------------------------------------------===// #include "BPF.h" -#include "BPFRegisterInfo.h" #include "BPFSubtarget.h" #include "MCTargetDesc/BPFMCTargetDesc.h" - +#include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" +#include using namespace llvm; @@ -36,14 +35,15 @@ class BPFDisassembler : public MCDisassembler { public: BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~BPFDisassembler() {} + ~BPFDisassembler() override = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; }; -} + +} // end anonymous namespace static MCDisassembler *createBPFDisassembler(const Target &T, const MCSubtargetInfo &STI, diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index a6cd2002c12c..afc321ea2c34 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -8,28 +8,24 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; namespace { + class BPFAsmBackend : public MCAsmBackend { public: bool IsLittleEndian; BPFAsmBackend(bool IsLittleEndian) : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {} - ~BPFAsmBackend() override {} + ~BPFAsmBackend() override = default; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; @@ -53,6 +49,8 @@ public: bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; }; +} // end anonymous namespace + bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { if ((Count % 8) != 0) return false; @@ -66,7 +64,6 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { - if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { @@ -92,7 +89,6 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createBPFELFObjectWriter(OS, 0, IsLittleEndian); } -} MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 3d1c0eb55afa..ebe9abd8ffac 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -10,29 +10,30 @@ #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include using namespace llvm; namespace { + class BPFELFObjectWriter : public MCELFObjectTargetWriter { public: BPFELFObjectWriter(uint8_t OSABI); - - ~BPFELFObjectWriter() override; + ~BPFELFObjectWriter() override = default; protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; }; -} + +} // end anonymous namespace BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_BPF, /*HasRelocationAddend*/ false) {} -BPFELFObjectWriter::~BPFELFObjectWriter() {} - unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index 47f16512a397..e8c974479828 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -12,24 +12,25 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" -#include "llvm/Support/raw_ostream.h" +#include +#include + using namespace llvm; #define DEBUG_TYPE "mccodeemitter" namespace { + class BPFMCCodeEmitter : public MCCodeEmitter { - BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete; - void operator=(const BPFMCCodeEmitter &) = delete; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; bool IsLittleEndian; @@ -38,8 +39,9 @@ public: BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, bool IsLittleEndian) : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {} - - ~BPFMCCodeEmitter() {} + BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete; + void operator=(const BPFMCCodeEmitter &) = delete; + ~BPFMCCodeEmitter() override = default; // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. @@ -66,7 +68,8 @@ private: void verifyInstructionPredicates(const MCInst &MI, uint64_t AvailableFeatures) const; }; -} + +} // end anonymous namespace MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index 55415f97396b..b58409730de0 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -12,14 +12,13 @@ //===----------------------------------------------------------------------===// #include "BPF.h" -#include "BPFMCTargetDesc.h" -#include "BPFMCAsmInfo.h" #include "InstPrinter/BPFInstPrinter.h" +#include "MCTargetDesc/BPFMCTargetDesc.h" +#include "MCTargetDesc/BPFMCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC @@ -64,7 +63,7 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) return new BPFInstPrinter(MAI, MII, MRI); - return 0; + return nullptr; } extern "C" void LLVMInitializeBPFTargetMC() { diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 5fb5b0227800..df12e0e88e3b 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -101,7 +101,7 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { } LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, - const char* Triple, const char* CPU, const char* Features, + const char *Triple, const char *CPU, const char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel) { Optional RM; @@ -139,7 +139,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, TargetOptions opt; return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, - CM, OL)); + CM, OL)); } void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); } diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index f4d46383e5bb..d9c53ecc8d08 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyExplicitLocals.cpp WebAssemblyFastISel.cpp WebAssemblyFixIrreducibleControlFlow.cpp + WebAssemblyFixFunctionBitcasts.cpp WebAssemblyFrameLowering.cpp WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h index 09c35b4825fc..8738263ad847 100644 --- a/lib/Target/WebAssembly/WebAssembly.h +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -28,6 +28,7 @@ class FunctionPass; // LLVM IR passes. ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj); void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &); +ModulePass *createWebAssemblyFixFunctionBitcasts(); FunctionPass *createWebAssemblyOptimizeReturned(); // ISel and immediate followup passes. diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp new file mode 100644 index 000000000000..d5474a02ce01 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -0,0 +1,159 @@ +//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Fix bitcasted functions. +/// +/// WebAssembly requires caller and callee signatures to match, however in LLVM, +/// some amount of slop is vaguely permitted. Detect mismatch by looking for +/// bitcasts of functions and rewrite them to use wrapper functions instead. +/// +/// This doesn't catch all cases, such as when a function's address is taken in +/// one place and casted in another, but it works for many common cases. +/// +/// Note that LLVM already optimizes away function bitcasts in common cases by +/// dropping arguments as needed, so this pass only ends up getting used in less +/// common cases. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-fix-function-bitcasts" + +namespace { +class FixFunctionBitcasts final : public ModulePass { + StringRef getPassName() const override { + return "WebAssembly Fix Function Bitcasts"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + FixFunctionBitcasts() : ModulePass(ID) {} +}; +} // End anonymous namespace + +char FixFunctionBitcasts::ID = 0; +ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() { + return new FixFunctionBitcasts(); +} + +// Recursively descend the def-use lists from V to find non-bitcast users of +// bitcasts of V. +static void FindUses(Value *V, Function &F, + SmallVectorImpl> &Uses) { + for (Use &U : V->uses()) { + if (BitCastOperator *BC = dyn_cast(U.getUser())) + FindUses(BC, F, Uses); + else if (U.get()->getType() != F.getType()) + Uses.push_back(std::make_pair(&U, &F)); + } +} + +// Create a wrapper function with type Ty that calls F (which may have a +// different type). Attempt to support common bitcasted function idioms: +// - Call with more arguments than needed: arguments are dropped +// - Call with fewer arguments than needed: arguments are filled in with undef +// - Return value is not needed: drop it +// - Return value needed but not present: supply an undef +// +// For now, return nullptr without creating a wrapper if the wrapper cannot +// be generated due to incompatible types. +static Function *CreateWrapper(Function *F, FunctionType *Ty) { + Module *M = F->getParent(); + + Function *Wrapper = + Function::Create(Ty, Function::PrivateLinkage, "bitcast", M); + BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); + + // Determine what arguments to pass. + SmallVector Args; + Function::arg_iterator AI = Wrapper->arg_begin(); + FunctionType::param_iterator PI = F->getFunctionType()->param_begin(); + FunctionType::param_iterator PE = F->getFunctionType()->param_end(); + for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) { + if (AI->getType() != *PI) { + Wrapper->eraseFromParent(); + return nullptr; + } + Args.push_back(&*AI); + } + for (; PI != PE; ++PI) + Args.push_back(UndefValue::get(*PI)); + + CallInst *Call = CallInst::Create(F, Args, "", BB); + + // Determine what value to return. + if (Ty->getReturnType()->isVoidTy()) + ReturnInst::Create(M->getContext(), BB); + else if (F->getFunctionType()->getReturnType()->isVoidTy()) + ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()), + BB); + else if (F->getFunctionType()->getReturnType() == Ty->getReturnType()) + ReturnInst::Create(M->getContext(), Call, BB); + else { + Wrapper->eraseFromParent(); + return nullptr; + } + + return Wrapper; +} + +bool FixFunctionBitcasts::runOnModule(Module &M) { + SmallVector, 0> Uses; + + // Collect all the places that need wrappers. + for (Function &F : M) + FindUses(&F, F, Uses); + + DenseMap, Function *> Wrappers; + + for (auto &UseFunc : Uses) { + Use *U = UseFunc.first; + Function *F = UseFunc.second; + PointerType *PTy = cast(U->get()->getType()); + FunctionType *Ty = dyn_cast(PTy->getElementType()); + + // If the function is casted to something like i8* as a "generic pointer" + // to be later casted to something else, we can't generate a wrapper for it. + // Just ignore such casts for now. + if (!Ty) + continue; + + auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr)); + if (Pair.second) + Pair.first->second = CreateWrapper(F, Ty); + + Function *Wrapper = Pair.first->second; + if (!Wrapper) + continue; + + if (isa(U->get())) + U->get()->replaceAllUsesWith(Wrapper); + else + U->set(Wrapper); + } + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td index 8a3248ee669e..e872dc219846 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -40,8 +40,8 @@ defm ROTL : BinaryInt; defm ROTR : BinaryInt; let isCommutable = 1 in { -defm EQ : ComparisonInt; -defm NE : ComparisonInt; +defm EQ : ComparisonInt; +defm NE : ComparisonInt; } // isCommutable = 1 defm LT_S : ComparisonInt; defm LT_U : ComparisonInt; diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index b61bc0a08143..f5ef35a2ad40 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -163,6 +163,10 @@ void WebAssemblyPassConfig::addIRPasses() { // control specifically what gets lowered. addPass(createAtomicExpandPass(TM)); + // Fix function bitcasts, as WebAssembly requires caller and callee signatures + // to match. + addPass(createWebAssemblyFixFunctionBitcasts()); + // Optimize "returned" function attributes. if (getOptLevel() != CodeGenOpt::None) addPass(createWebAssemblyOptimizeReturned()); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7f72ab17f619..db76ddf04c06 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6962,23 +6962,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); } -/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB -/// node. -static SDValue LowerToAddSub(const BuildVectorSDNode *BV, - const X86Subtarget &Subtarget, SelectionDAG &DAG) { +/// Returns true iff \p BV builds a vector with the result equivalent to +/// the result of ADDSUB operation. +/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation +/// are written to the parameters \p Opnd0 and \p Opnd1. +static bool isAddSub(const BuildVectorSDNode *BV, + const X86Subtarget &Subtarget, SelectionDAG &DAG, + SDValue &Opnd0, SDValue &Opnd1) { + MVT VT = BV->getSimpleValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && - (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) - return SDValue(); + (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) && + (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64))) + return false; - SDLoc DL(BV); unsigned NumElts = VT.getVectorNumElements(); SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); - assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 || - VT == MVT::v2f64) && "build_vector with an invalid type found!"); - // Odd-numbered elements in the input build vector are obtained from // adding two integer/float elements. // Even-numbered elements in the input build vector are obtained from @@ -7000,7 +7001,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, // Early exit if we found an unexpected opcode. if (Opcode != ExpectedOpcode) - return SDValue(); + return false; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -7013,11 +7014,11 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, !isa(Op0.getOperand(1)) || !isa(Op1.getOperand(1)) || Op0.getOperand(1) != Op1.getOperand(1)) - return SDValue(); + return false; unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); if (I0 != i) - return SDValue(); + return false; // We found a valid add/sub node. Update the information accordingly. if (i & 1) @@ -7029,39 +7030,118 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, if (InVec0.isUndef()) { InVec0 = Op0.getOperand(0); if (InVec0.getSimpleValueType() != VT) - return SDValue(); + return false; } if (InVec1.isUndef()) { InVec1 = Op1.getOperand(0); if (InVec1.getSimpleValueType() != VT) - return SDValue(); + return false; } // Make sure that operands in input to each add/sub node always // come from a same pair of vectors. if (InVec0 != Op0.getOperand(0)) { if (ExpectedOpcode == ISD::FSUB) - return SDValue(); + return false; // FADD is commutable. Try to commute the operands // and then test again. std::swap(Op0, Op1); if (InVec0 != Op0.getOperand(0)) - return SDValue(); + return false; } if (InVec1 != Op1.getOperand(0)) - return SDValue(); + return false; // Update the pair of expected opcodes. std::swap(ExpectedOpcode, NextExpectedOpcode); } // Don't try to fold this build_vector into an ADDSUB if the inputs are undef. - if (AddFound && SubFound && !InVec0.isUndef() && !InVec1.isUndef()) - return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1); + if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef()) + return false; - return SDValue(); + Opnd0 = InVec0; + Opnd1 = InVec1; + return true; +} + +/// Returns true if is possible to fold MUL and an idiom that has already been +/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1). +/// If (and only if) true is returned, the operands of FMADDSUB are written to +/// parameters \p Opnd0, \p Opnd1, \p Opnd2. +/// +/// Prior to calling this function it should be known that there is some +/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation +/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called +/// before replacement of such SDNode with ADDSUB operation. Thus the number +/// of \p Opnd0 uses is expected to be equal to 2. +/// For example, this function may be called for the following IR: +/// %AB = fmul fast <2 x double> %A, %B +/// %Sub = fsub fast <2 x double> %AB, %C +/// %Add = fadd fast <2 x double> %AB, %C +/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, +/// <2 x i32> +/// There is a def for %Addsub here, which potentially can be replaced by +/// X86ISD::ADDSUB operation: +/// %Addsub = X86ISD::ADDSUB %AB, %C +/// and such ADDSUB can further be replaced with FMADDSUB: +/// %Addsub = FMADDSUB %A, %B, %C. +/// +/// The main reason why this method is called before the replacement of the +/// recognized ADDSUB idiom with ADDSUB operation is that such replacement +/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit +/// FMADDSUB is. +static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG, + SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) { + if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 || + !Subtarget.hasAnyFMA()) + return false; + + // FIXME: These checks must match the similar ones in + // DAGCombiner::visitFADDForFMACombine. It would be good to have one + // function that would answer if it is Ok to fuse MUL + ADD to FMADD + // or MUL + ADDSUB to FMADDSUB. + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + if (!AllowFusion) + return false; + + Opnd2 = Opnd1; + Opnd1 = Opnd0.getOperand(1); + Opnd0 = Opnd0.getOperand(0); + + return true; +} + +/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation +/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node. +static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue Opnd0, Opnd1; + if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1)) + return SDValue(); + + MVT VT = BV->getSimpleValueType(0); + SDLoc DL(BV); + + // Try to generate X86ISD::FMADDSUB node here. + SDValue Opnd2; + if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2)) + return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2); + + // Do not generate X86ISD::ADDSUB node for 512-bit types even though + // the ADDSUB idiom has been successfully recognized. There are no known + // X86 targets with 512-bit ADDSUB instructions! + // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom + // recognition. + if (VT.is512BitVector()) + return SDValue(); + + return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. @@ -7290,7 +7370,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return VectorConstant; BuildVectorSDNode *BV = cast(Op.getNode()); - if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG)) + if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) return HorizontalOp; @@ -12965,6 +13045,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Subtarget.hasVBMI()) return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); + // Try to create an in-lane repeating shuffle mask and then shuffle the + // the results into the target lanes. + if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( + DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) + return V; + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } @@ -16985,9 +17071,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); } - if (Cond.getOpcode() == ISD::SETCC) - if (SDValue NewCond = LowerSETCC(Cond, DAG)) + if (Cond.getOpcode() == ISD::SETCC) { + if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; + // If the condition was updated, it's possible that the operands of the + // select were also updated (for example, EmitTest has a RAUW). Refresh + // the local references to the select operands in case they got stale. + Op1 = Op.getOperand(1); + Op2 = Op.getOperand(2); + } + } // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y @@ -17193,22 +17286,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI()) return SDValue(); - if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { + if (VT.is512BitVector() && InVTElt != MVT::i1) { if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT) return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0)); return DAG.getNode(X86ISD::VSEXT, dl, VT, In); } - assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); + assert (InVTElt == MVT::i1 && "Unexpected vector type"); MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); - SDValue NegOne = DAG.getConstant( - APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); - SDValue Zero = DAG.getConstant( - APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); + SDValue V; + if (Subtarget.hasDQI()) { + V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In); + assert(!VT.is512BitVector() && "Unexpected vector type"); + } else { + SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl); + SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); + V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); + if (VT.is512BitVector()) + return V; + } - SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); - if (VT.is512BitVector()) - return V; return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); } @@ -21528,6 +21625,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); } + // It's worth extending once and using the vXi16/vXi32 shifts for smaller + // types, but without AVX512 the extra overheads to get from vXi8 to vXi32 + // make the existing SSE solution better. + if ((Subtarget.hasInt256() && VT == MVT::v8i16) || + (Subtarget.hasAVX512() && VT == MVT::v16i16) || + (Subtarget.hasAVX512() && VT == MVT::v16i8) || + (Subtarget.hasBWI() && VT == MVT::v32i8)) { + MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32); + MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); + unsigned ExtOpc = + Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + R = DAG.getNode(ExtOpc, dl, ExtVT, R); + Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); + } + if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); @@ -21636,19 +21750,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } } - // It's worth extending once and using the v8i32 shifts for 16-bit types, but - // the extra overheads to get from v16i8 to v8i32 make the existing SSE - // solution better. - if (Subtarget.hasInt256() && VT == MVT::v8i16) { - MVT ExtVT = MVT::v8i32; - unsigned ExtOpc = - Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - R = DAG.getNode(ExtOpc, dl, ExtVT, R); - Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); - } - if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); @@ -27763,29 +27864,32 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); } -/// \brief Try to combine a shuffle into a target-specific add-sub node. +/// Returns true iff the shuffle node \p N can be replaced with ADDSUB +/// operation. If true is returned then the operands of ADDSUB operation +/// are written to the parameters \p Opnd0 and \p Opnd1. /// -/// We combine this directly on the abstract vector shuffle nodes so it is -/// easier to generically match. We also insert dummy vector shuffle nodes for -/// the operands which explicitly discard the lanes which are unused by this -/// operation to try to flow through the rest of the combiner the fact that -/// they're unused. -static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - SDLoc DL(N); +/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes +/// so it is easier to generically match. We also insert dummy vector shuffle +/// nodes for the operands which explicitly discard the lanes which are unused +/// by this operation to try to flow through the rest of the combiner +/// the fact that they're unused. +static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget, + SDValue &Opnd0, SDValue &Opnd1) { + EVT VT = N->getValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && - (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) - return SDValue(); + (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) && + (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64))) + return false; // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask // extraction tool to support more. if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return SDValue(); + return false; ArrayRef OrigMask = cast(N)->getMask(); - SmallVector Mask(OrigMask.begin(), OrigMask.end()); + SmallVector Mask(OrigMask.begin(), OrigMask.end()); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); @@ -27796,27 +27900,57 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget, ShuffleVectorSDNode::commuteMask(Mask); std::swap(V1, V2); } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD) - return SDValue(); + return false; // If there are other uses of these operations we can't fold them. if (!V1->hasOneUse() || !V2->hasOneUse()) - return SDValue(); + return false; // Ensure that both operations have the same operands. Note that we can // commute the FADD operands. SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1); if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) - return SDValue(); + return false; // We're looking for blends between FADD and FSUB nodes. We insist on these // nodes being lined up in a specific expected pattern. if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) || isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) || - isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}))) + isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) || + isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23, + 8, 25, 10, 27, 12, 29, 14, 31}))) + return false; + + Opnd0 = LHS; + Opnd1 = RHS; + return true; +} + +/// \brief Try to combine a shuffle into a target-specific add-sub or +/// mul-add-sub node. +static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue Opnd0, Opnd1; + if (!isAddSub(N, Subtarget, Opnd0, Opnd1)) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Try to generate X86ISD::FMADDSUB node here. + SDValue Opnd2; + if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2)) + return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2); + + // Do not generate X86ISD::ADDSUB node for 512-bit types even though + // the ADDSUB idiom has been successfully recognized. There are no known + // X86 targets with 512-bit ADDSUB instructions! + if (VT.is512BitVector()) return SDValue(); - return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS); + return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } // We are looking for a shuffle where both sources are concatenated with undef @@ -27878,7 +28012,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // If we have legalized the vector types, look for blends of FADD and FSUB // nodes that we can fuse into an ADDSUB node. if (TLI.isTypeLegal(VT)) - if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG)) + if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; // During Type Legalization, when promoting illegal vector types, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 908053e1342d..d44d1395f243 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -443,6 +443,22 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } +// Alias instructions that allow VPTERNLOG to be used with a mask to create +// a mix of all ones and all zeros elements. This is done this way to force +// the same register to be used as input for all three sources. +let isPseudo = 1, Predicates = [HasAVX512] in { +def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), + (ins VK16WM:$mask), "", + [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), + (v16i32 immAllOnesV), + (v16i32 immAllZerosV)))]>; +def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), + (ins VK8WM:$mask), "", + [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), + (bc_v8i64 (v16i32 immAllOnesV)), + (bc_v8i64 (v16i32 immAllZerosV))))]>; +} + let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in { def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", @@ -1064,10 +1080,10 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), (v8f32 VR256X:$src), 1)>; def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), - (v4f64 VR256X:$src), 1)>; + (v4f64 VR256X:$src), 1)>; def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), - (v4i64 VR256X:$src), 1)>; + (v4i64 VR256X:$src), 1)>; def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v8i32 VR256X:$src), 1)>; @@ -1485,8 +1501,7 @@ defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", // AVX-512 - BLEND using mask // multiclass avx512_blendmask opc, string OpcodeStr, X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { - let hasSideEffects = 0 in + let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { def rr : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _> { (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - [(set _.RC:$dst, (vselect _.KRCWM:$mask, - (_.VT _.RC:$src2), - (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K; - let hasSideEffects = 0 in + []>, EVEX_4V, EVEX_K; def rrkz : AVX5128I, EVEX_4V, EVEX_KZ; - let mayLoad = 1, hasSideEffects = 0 in + let mayLoad = 1 in { def rm : AVX5128I opc, string OpcodeStr, X86VectorVTInfo _> { (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - [(set _.RC:$dst, (vselect _.KRCWM:$mask, - (_.VT (bitconvert (_.LdFrag addr:$src2))), - (_.VT _.RC:$src1)))]>, - EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>; - let mayLoad = 1, hasSideEffects = 0 in + []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>; def rmkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>; } + } } multiclass avx512_blendmask_rmb opc, string OpcodeStr, X86VectorVTInfo _> { + let mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I, - EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + []>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; - let mayLoad = 1, hasSideEffects = 0 in def rmb : AVX5128I, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; - + } } multiclass blendmask_dq opc, string OpcodeStr, @@ -1582,21 +1588,6 @@ defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>; defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W; -let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), - (v8f32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), - (v8i32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; -} //===----------------------------------------------------------------------===// // Compare Instructions //===----------------------------------------------------------------------===// @@ -2735,7 +2726,7 @@ multiclass avx512_load opc, string OpcodeStr, X86VectorVTInfo _, (ins _.KRCWM:$mask, _.RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", "${dst} {${mask}} {z}, $src}"), - [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, + [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, (_.VT _.RC:$src), _.ImmAllZerosV)))], _.ExeDomain>, EVEX, EVEX_KZ; @@ -2972,6 +2963,30 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), (v16i32 VR512:$src))), (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; +// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't +// available. Use a 512-bit operation and extract. +let Predicates = [HasAVX512, NoVLX] in { +def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), + (v8f32 VR256X:$src0))), + (EXTRACT_SUBREG + (v16f32 + (VMOVAPSZrrk + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)), + (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), + sub_ymm)>; + +def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), + (v8i32 VR256X:$src0))), + (EXTRACT_SUBREG + (v16i32 + (VMOVDQA32Zrrk + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)), + (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), + sub_ymm)>; +} + let Predicates = [HasVLX, NoBWI] in { // 128-bit load/store without BWI. def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), @@ -3116,13 +3131,13 @@ let Predicates = [HasVLX] in { (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; } - -// Move Int Doubleword to Packed Double Int -// -let ExeDomain = SSEPackedInt in { -def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, + +// Move Int Doubleword to Packed Double Int +// +let ExeDomain = SSEPackedInt in { +def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, EVEX; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), @@ -3152,47 +3167,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, - EVEX_CD8<64, CD8VT1>; -} -} // ExeDomain = SSEPackedInt - -// Move Int Doubleword to Single Scalar -// -let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { -def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, + EVEX_CD8<64, CD8VT1>; +} +} // ExeDomain = SSEPackedInt + +// Move Int Doubleword to Single Scalar +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { +def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))], IIC_SSE_MOVDQ>, EVEX; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; -} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 - -// Move doubleword from xmm register to r/m32 -// -let ExeDomain = SSEPackedInt in { -def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + +// Move doubleword from xmm register to r/m32 +// +let ExeDomain = SSEPackedInt in { +def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, EVEX; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", - [(store (i32 (extractelt (v4i32 VR128X:$src), - (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, EVEX_CD8<32, CD8VT1>; -} // ExeDomain = SSEPackedInt - -// Move quadword from xmm1 register to r/m64 -// -let ExeDomain = SSEPackedInt in { -def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovq\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), + [(store (i32 (extractelt (v4i32 VR128X:$src), + (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, EVEX_CD8<32, CD8VT1>; +} // ExeDomain = SSEPackedInt + +// Move quadword from xmm1 register to r/m64 +// +let ExeDomain = SSEPackedInt in { +def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Requires<[HasAVX512, In64BitMode]>; @@ -3213,39 +3228,39 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), let hasSideEffects = 0 in def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src), - "vmovq.s\t{$src, $dst|$dst, $src}",[]>, - EVEX, VEX_W; -} // ExeDomain = SSEPackedInt - -// Move Scalar Single to Double Int -// -let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { -def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), - (ins FR32X:$src), - "vmovd\t{$src, $dst|$dst, $src}", + (ins VR128X:$src), + "vmovq.s\t{$src, $dst|$dst, $src}",[]>, + EVEX, VEX_W; +} // ExeDomain = SSEPackedInt + +// Move Scalar Single to Double Int +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { +def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), + (ins FR32X:$src), + "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], IIC_SSE_MOVD_ToGP>, EVEX; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), - "vmovd\t{$src, $dst|$dst, $src}", - [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; -} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 - -// Move Quadword Int to Packed Quadword Int -// -let ExeDomain = SSEPackedInt in { -def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), - (ins i64mem:$src), - "vmovq\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; -} // ExeDomain = SSEPackedInt - -//===----------------------------------------------------------------------===// -// AVX-512 MOVSS, MOVSD + "vmovd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + +// Move Quadword Int to Packed Quadword Int +// +let ExeDomain = SSEPackedInt in { +def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; +} // ExeDomain = SSEPackedInt + +//===----------------------------------------------------------------------===// +// AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// multiclass avx512_move_scalar; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, + (COPY_TO_REGCLASS FR64X:$src, VR128X))>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 579359794fbd..e3484d062bc8 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -543,7 +543,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MOV8rr, X86::MOV8rm, 0 }, { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, - { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE }, { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, @@ -661,7 +661,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, - { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 }, + { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE }, { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, @@ -6864,6 +6864,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addReg(Reg, RegState::Undef).addImm(0xff); return true; } + case X86::AVX512_512_SEXT_MASK_32: + case X86::AVX512_512_SEXT_MASK_64: { + unsigned Reg = MIB->getOperand(0).getReg(); + unsigned MaskReg = MIB->getOperand(1).getReg(); + unsigned MaskState = getRegState(MIB->getOperand(1)); + unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? + X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; + MI.RemoveOperand(1); + MIB->setDesc(get(Opc)); + // VPTERNLOG needs 3 register inputs and an immediate. + // 0xff will return 1s for any input. + MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState) + .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff); + return true; + } case X86::VMOVAPSZ128rm_NOVLX: return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm), get(X86::VBROADCASTF32X4rm), X86::sub_xmm); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4cd6ae563f03..09971d586a41 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6397,7 +6397,7 @@ let Predicates = [HasAVX] in { defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; - defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; + defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; } let Predicates = [UseAVX] in { diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index de4839432b9a..107ed9359376 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX512BWUniformConstCostTable[] = { + { ISD::SHL, MVT::v64i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence }; @@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX2UniformConstCostTable[] = { + { ISD::SHL, MVT::v32i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence @@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry SSE2UniformConstCostTable[] = { + { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand). + { ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand). + { ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb). + { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence @@ -207,6 +223,43 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } + static const CostTblEntry AVX2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SHL, MVT::v16i16, 1 }, // psllw. + { ISD::SRL, MVT::v16i16, 1 }, // psrlw. + { ISD::SRA, MVT::v16i16, 1 }, // psraw. + }; + + if (ST->hasAVX2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + + static const CostTblEntry SSE2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SHL, MVT::v8i16, 1 }, // psllw. + { ISD::SHL, MVT::v4i32, 1 }, // pslld + { ISD::SHL, MVT::v2i64, 1 }, // psllq. + + { ISD::SRL, MVT::v8i16, 1 }, // psrlw. + { ISD::SRL, MVT::v4i32, 1 }, // psrld. + { ISD::SRL, MVT::v2i64, 1 }, // psrlq. + + { ISD::SRA, MVT::v8i16, 1 }, // psraw. + { ISD::SRA, MVT::v4i32, 1 }, // psrad. + }; + + if (ST->hasSSE2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry AVX512DQCostTable[] = { { ISD::MUL, MVT::v2i64, 1 }, { ISD::MUL, MVT::v4i64, 1 }, @@ -219,6 +272,10 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; static const CostTblEntry AVX512BWCostTable[] = { + { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw + { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw + { ISD::SRA, MVT::v32i16, 1 }, // vpsravw + { ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence. { ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence. { ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence. @@ -259,7 +316,7 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX2CostTable[] = { + static const CostTblEntry AVX2ShiftCostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. { ISD::SHL, MVT::v4i32, 1 }, @@ -283,11 +340,11 @@ int X86TTIImpl::getArithmeticInstrCost( // is lowered into a vector multiply (vpmullw). return LT.first; - if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) + if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; } - static const CostTblEntry XOPCostTable[] = { + static const CostTblEntry XOPShiftCostTable[] = { // 128bit shifts take 1cy, but right shifts require negation beforehand. { ISD::SHL, MVT::v16i8, 1 }, { ISD::SRL, MVT::v16i8, 2 }, @@ -318,93 +375,20 @@ int X86TTIImpl::getArithmeticInstrCost( // Look for XOP lowering tricks. if (ST->hasXOP()) - if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second)) + if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX2CustomCostTable[] = { - { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. - { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. - - { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence. - { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. - - { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. - { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. - { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. - { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. - - { ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence. - { ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence. - { ISD::MUL, MVT::v8i32, 1 }, // pmulld - { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add - - { ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ - { ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/ - { ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ - }; - - // Look for AVX2 lowering tricks for custom cases. - if (ST->hasAVX2()) - if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD, - LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVXCustomCostTable[] = { - { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence. - - { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/ - - // Vectorizing division is a bad idea. See the SSE2 table for more comments. - { ISD::SDIV, MVT::v32i8, 32*20 }, - { ISD::SDIV, MVT::v16i16, 16*20 }, - { ISD::SDIV, MVT::v8i32, 8*20 }, - { ISD::SDIV, MVT::v4i64, 4*20 }, - { ISD::UDIV, MVT::v32i8, 32*20 }, - { ISD::UDIV, MVT::v16i16, 16*20 }, - { ISD::UDIV, MVT::v8i32, 8*20 }, - { ISD::UDIV, MVT::v4i64, 4*20 }, - }; - - // Look for AVX2 lowering tricks for custom cases. - if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD, - LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry - SSE2UniformCostTable[] = { + static const CostTblEntry SSE2UniformShiftCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v16i8, 1 }, // psllw. - { ISD::SHL, MVT::v32i8, 2 }, // psllw. - { ISD::SHL, MVT::v8i16, 1 }, // psllw. { ISD::SHL, MVT::v16i16, 2 }, // psllw. - { ISD::SHL, MVT::v4i32, 1 }, // pslld { ISD::SHL, MVT::v8i32, 2 }, // pslld - { ISD::SHL, MVT::v2i64, 1 }, // psllq. { ISD::SHL, MVT::v4i64, 2 }, // psllq. - { ISD::SRL, MVT::v16i8, 1 }, // psrlw. - { ISD::SRL, MVT::v32i8, 2 }, // psrlw. - { ISD::SRL, MVT::v8i16, 1 }, // psrlw. { ISD::SRL, MVT::v16i16, 2 }, // psrlw. - { ISD::SRL, MVT::v4i32, 1 }, // psrld. { ISD::SRL, MVT::v8i32, 2 }, // psrld. - { ISD::SRL, MVT::v2i64, 1 }, // psrlq. { ISD::SRL, MVT::v4i64, 2 }, // psrlq. - { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. - { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb. - { ISD::SRA, MVT::v8i16, 1 }, // psraw. { ISD::SRA, MVT::v16i16, 2 }, // psraw. - { ISD::SRA, MVT::v4i32, 1 }, // psrad. { ISD::SRA, MVT::v8i32, 2 }, // psrad. { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle. @@ -414,7 +398,7 @@ int X86TTIImpl::getArithmeticInstrCost( ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || (Op2Info == TargetTransformInfo::OK_UniformValue))) { if (const auto *Entry = - CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) + CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; } @@ -422,24 +406,98 @@ int X86TTIImpl::getArithmeticInstrCost( Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { MVT VT = LT.second; // Vector shift left by non uniform constant can be lowered - // into vector multiply (pmullw/pmulld). - if ((VT == MVT::v8i16 && ST->hasSSE2()) || - (VT == MVT::v4i32 && ST->hasSSE41())) - return LT.first; - - // v16i16 and v8i32 shifts by non-uniform constants are lowered into a - // sequence of extract + two vector multiply + insert. - if ((VT == MVT::v8i32 || VT == MVT::v16i16) && - (ST->hasAVX() && !ST->hasAVX2())) - ISD = ISD::MUL; - - // A vector shift left by non uniform constant is converted - // into a vector multiply; the new multiply is eventually - // lowered into a sequence of shuffles and 2 x pmuludq. - if (VT == MVT::v4i32 && ST->hasSSE2()) + // into vector multiply. + if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) || + ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX())) ISD = ISD::MUL; } + static const CostTblEntry AVX2CostTable[] = { + { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. + { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. + + { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence. + { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. + + { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. + { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. + { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. + + { ISD::SUB, MVT::v32i8, 1 }, // psubb + { ISD::ADD, MVT::v32i8, 1 }, // paddb + { ISD::SUB, MVT::v16i16, 1 }, // psubw + { ISD::ADD, MVT::v16i16, 1 }, // paddw + { ISD::SUB, MVT::v8i32, 1 }, // psubd + { ISD::ADD, MVT::v8i32, 1 }, // paddd + { ISD::SUB, MVT::v4i64, 1 }, // psubq + { ISD::ADD, MVT::v4i64, 1 }, // paddq + + { ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v16i16, 1 }, // pmullw + { ISD::MUL, MVT::v8i32, 1 }, // pmulld + { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add + + { ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ + { ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/ + { ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ + }; + + // Look for AVX2 lowering tricks for custom cases. + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v16i16, 4 }, + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v32i8, 4 }, + { ISD::ADD, MVT::v32i8, 4 }, + { ISD::SUB, MVT::v16i16, 4 }, + { ISD::ADD, MVT::v16i16, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + + // A v4i64 multiply is custom lowered as two split v2i64 vectors that then + // are lowered as a series of long multiplies(3), shifts(3) and adds(2) + // Because we believe v4i64 to be a legal type, we must also include the + // extract+insert in the cost table. Therefore, the cost here is 18 + // instead of 8. + { ISD::MUL, MVT::v4i64, 18 }, + + { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence. + + { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ + { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/ + { ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/ + + // Vectorizing division is a bad idea. See the SSE2 table for more comments. + { ISD::SDIV, MVT::v32i8, 32*20 }, + { ISD::SDIV, MVT::v16i16, 16*20 }, + { ISD::SDIV, MVT::v8i32, 8*20 }, + { ISD::SDIV, MVT::v4i64, 4*20 }, + { ISD::UDIV, MVT::v32i8, 32*20 }, + { ISD::UDIV, MVT::v16i16, 16*20 }, + { ISD::UDIV, MVT::v8i32, 8*20 }, + { ISD::UDIV, MVT::v4i64, 4*20 }, + }; + + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + static const CostTblEntry SSE42CostTable[] = { { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ @@ -456,6 +514,8 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence. { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence. + { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld + { ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence. @@ -501,6 +561,7 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v8i16, 1 }, // pmullw { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add @@ -516,46 +577,19 @@ int X86TTIImpl::getArithmeticInstrCost( // generally a bad idea. Assume somewhat arbitrarily that we have to be able // to hide "20 cycles" for each lane. { ISD::SDIV, MVT::v16i8, 16*20 }, - { ISD::SDIV, MVT::v8i16, 8*20 }, - { ISD::SDIV, MVT::v4i32, 4*20 }, - { ISD::SDIV, MVT::v2i64, 2*20 }, + { ISD::SDIV, MVT::v8i16, 8*20 }, + { ISD::SDIV, MVT::v4i32, 4*20 }, + { ISD::SDIV, MVT::v2i64, 2*20 }, { ISD::UDIV, MVT::v16i8, 16*20 }, - { ISD::UDIV, MVT::v8i16, 8*20 }, - { ISD::UDIV, MVT::v4i32, 4*20 }, - { ISD::UDIV, MVT::v2i64, 2*20 }, + { ISD::UDIV, MVT::v8i16, 8*20 }, + { ISD::UDIV, MVT::v4i32, 4*20 }, + { ISD::UDIV, MVT::v2i64, 2*20 }, }; if (ST->hasSSE2()) if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v16i16, 4 }, - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v32i8, 4 }, - { ISD::ADD, MVT::v32i8, 4 }, - { ISD::SUB, MVT::v16i16, 4 }, - { ISD::ADD, MVT::v16i16, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - // A v4i64 multiply is custom lowered as two split v2i64 vectors that then - // are lowered as a series of long multiplies(3), shifts(3) and adds(2) - // Because we believe v4i64 to be a legal type, we must also include the - // extract+insert in the cost table. Therefore, the cost here is 18 - // instead of 8. - { ISD::MUL, MVT::v4i64, 18 }, - }; - - // Look for AVX1 lowering tricks. - if (ST->hasAVX() && !ST->hasAVX2()) - if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) - return LT.first * Entry->Cost; - static const CostTblEntry SSE1CostTable[] = { { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ @@ -639,8 +673,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw - { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128 - // + 2*pshufb + vinserti64x4 + { TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2 { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index f4742aaf748f..82daf754be0d 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -42,6 +42,8 @@ using namespace llvm; using namespace lowertypetests; +using SummaryAction = LowerTypeTestsSummaryAction; + #define DEBUG_TYPE "lowertypetests" STATISTIC(ByteArraySizeBits, "Byte array size in bits"); @@ -55,9 +57,15 @@ static cl::opt AvoidReuse( cl::desc("Try to avoid reuse of byte array addresses using aliases"), cl::Hidden, cl::init(true)); -static cl::opt ClSummaryAction( +static cl::opt ClSummaryAction( "lowertypetests-summary-action", - cl::desc("What to do with the summary when running this pass"), cl::Hidden); + cl::desc("What to do with the summary when running this pass"), + cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"), + clEnumValN(SummaryAction::Import, "import", + "Import typeid resolutions from summary and globals"), + clEnumValN(SummaryAction::Export, "export", + "Export typeid resolutions to summary and globals")), + cl::Hidden); static cl::opt ClReadSummary( "lowertypetests-read-summary", @@ -226,8 +234,8 @@ public: class LowerTypeTestsModule { Module &M; - // This is for testing purposes only. - std::unique_ptr OwnedSummary; + SummaryAction Action; + ModuleSummaryIndex *Summary; bool LinkerSubsectionsViaSymbols; Triple::ArchType Arch; @@ -319,21 +327,38 @@ class LowerTypeTestsModule { void createJumpTable(Function *F, ArrayRef Functions); public: - LowerTypeTestsModule(Module &M); - ~LowerTypeTestsModule(); + LowerTypeTestsModule(Module &M, SummaryAction Action, + ModuleSummaryIndex *Summary); bool lower(); + + // Lower the module using the action and summary passed as command line + // arguments. For testing purposes only. + static bool runForTesting(Module &M); }; struct LowerTypeTests : public ModulePass { static char ID; - LowerTypeTests() : ModulePass(ID) { + + bool UseCommandLine = false; + + SummaryAction Action; + ModuleSummaryIndex *Summary; + + LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { + initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); + } + + LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary) + : ModulePass(ID), Action(Action), Summary(Summary) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { if (skipModule(M)) return false; - return LowerTypeTestsModule(M).lower(); + if (UseCommandLine) + return LowerTypeTestsModule::runForTesting(M); + return LowerTypeTestsModule(M, Action, Summary).lower(); } }; @@ -343,7 +368,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, false) char LowerTypeTests::ID = 0; -ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; } +ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action, + ModuleSummaryIndex *Summary) { + return new LowerTypeTests(Action, Summary); +} /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. @@ -1145,22 +1173,12 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( } /// Lower all type tests in this module. -LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) { - // Handle the command-line summary arguments. This code is for testing - // purposes only, so we handle errors directly. - if (!ClSummaryAction.empty()) { - OwnedSummary = make_unique(); - if (!ClReadSummary.empty()) { - ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + - ": "); - auto ReadSummaryFile = - ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); - - yaml::Input In(ReadSummaryFile->getBuffer()); - In >> *OwnedSummary; - ExitOnErr(errorCodeToError(In.error())); - } - } +LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action, + ModuleSummaryIndex *Summary) + : M(M), Action(Action), Summary(Summary) { + // FIXME: Use these fields. + (void)this->Action; + (void)this->Summary; Triple TargetTriple(M.getTargetTriple()); LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); @@ -1169,18 +1187,36 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) { ObjectFormat = TargetTriple.getObjectFormat(); } -LowerTypeTestsModule::~LowerTypeTestsModule() { - if (ClSummaryAction.empty() || ClWriteSummary.empty()) - return; +bool LowerTypeTestsModule::runForTesting(Module &M) { + ModuleSummaryIndex Summary; - ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + - ": "); - std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); - ExitOnErr(errorCodeToError(EC)); + // Handle the command-line summary arguments. This code is for testing + // purposes only, so we handle errors directly. + if (!ClReadSummary.empty()) { + ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + + ": "); + auto ReadSummaryFile = + ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); + + yaml::Input In(ReadSummaryFile->getBuffer()); + In >> Summary; + ExitOnErr(errorCodeToError(In.error())); + } + + bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower(); + + if (!ClWriteSummary.empty()) { + ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + + ": "); + std::error_code EC; + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + ExitOnErr(errorCodeToError(EC)); + + yaml::Output Out(OS); + Out << Summary; + } - yaml::Output Out(OS); - Out << *OwnedSummary; + return Changed; } bool LowerTypeTestsModule::lower() { @@ -1313,7 +1349,8 @@ bool LowerTypeTestsModule::lower() { PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = LowerTypeTestsModule(M).lower(); + bool Changed = + LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 293ddf21a68f..d086ee05a64f 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -857,7 +857,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { // Lower type metadata and the type.test intrinsic. This pass supports Clang's // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. - PM.add(createLowerTypeTestsPass()); + PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None, + /*Summary=*/nullptr)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 012bfc7b4944..013159cde774 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1903,7 +1903,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, return foldICmpShlOne(Cmp, Shl, C); // Check that the shift amount is in range. If not, don't perform undefined - // shifts. When the shift is visited it will be simplified. + // shifts. When the shift is visited, it will be simplified. unsigned TypeBits = C->getBitWidth(); if (ShiftAmt->uge(TypeBits)) return nullptr; @@ -1923,7 +1923,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, return new ICmpInst(Pred, X, LShrC); if (Shl->hasOneUse()) { - // Otherwise strength reduce the shift into an and. + // Otherwise, strength reduce the shift into an and. Constant *Mask = ConstantInt::get(Shl->getType(), APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); @@ -1951,7 +1951,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, } // When the shift is nuw and pred is >u or <=u, comparison only really happens - // in the pre-shifted bits. Since InstSimplify canoncalizes <=u into hasNoUnsignedWrap() && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) { @@ -1970,9 +1970,9 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, // Transform (icmp pred iM (shl iM %v, N), C) // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N)) // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N. - // This enables us to get rid of the shift in favor of a trunc which can be + // This enables us to get rid of the shift in favor of a trunc that may be // free on the target. It has the additional benefit of comparing to a - // smaller constant, which will be target friendly. + // smaller constant that may be more target-friendly. unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1); if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt && DL.isLegalInteger(TypeBits - Amt)) { diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 1d5528398776..54bdc9e0772b 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1818,6 +1818,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { RegisteredFlag = new GlobalVariable( M, IntptrTy, false, GlobalVariable::CommonLinkage, ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); // Update llvm.compiler.used, adding the new liveness globals. This is // needed so that during LTO these variables stay alive. The alternative diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6aeb5237ffe3..68faa886060a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1423,7 +1423,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { if (widenLoopCompare(DU)) return nullptr; - // This user does not evaluate to a recurence after widening, so don't + // This user does not evaluate to a recurrence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. truncateIVUse(DU, DT, LI); diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 08e7acdaaf72..8fb580183e30 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -415,7 +415,9 @@ public: Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(), PH->getTerminator()); Value *Initial = - new LoadInst(InitialPtr, "load_initial", PH->getTerminator()); + new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false, + Cand.Load->getAlignment(), PH->getTerminator()); + PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", &L->getHeader()->front()); PHI->addIncoming(Initial, PH); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 6f7682c96cef..76fe91884c7b 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1382,8 +1382,8 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(), Succ->begin(), Succ->end()); LPM->deleteSimpleAnalysisValue(BI, L); - BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); + BI->eraseFromParent(); // Remove Succ from the loop tree. LI->removeBlock(Succ); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 8b8236390bf4..eef7db08cd46 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -79,7 +79,8 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); -STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); +STATISTIC(NumGVNMaxIterations, + "Maximum Number of iterations it took to converge GVN"); //===----------------------------------------------------------------------===// // GVN Pass @@ -327,7 +328,7 @@ private: // Elimination. struct ValueDFS; void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, - std::vector &); + SmallVectorImpl &); bool eliminateInstructions(Function &); void replaceInstruction(Instruction *, Value *); @@ -336,8 +337,11 @@ private: // New instruction creation. void handleNewInstruction(Instruction *){}; + + // Various instruction touch utilities void markUsersTouched(Value *); void markMemoryUsersTouched(MemoryAccess *); + void markLeaderChangeTouched(CongruenceClass *CC); // Utilities. void cleanupTables(); @@ -390,10 +394,10 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) PHIExpression *NewGVN::createPHIExpression(Instruction *I) { - BasicBlock *PhiBlock = I->getParent(); + BasicBlock *PHIBlock = I->getParent(); auto *PN = cast(I); - auto *E = new (ExpressionAllocator) - PHIExpression(PN->getNumOperands(), I->getParent()); + auto *E = + new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); E->allocateOperands(ArgRecycler, ExpressionAllocator); E->setType(I->getType()); @@ -408,10 +412,10 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { - // Don't try to transform self-defined phis + // Don't try to transform self-defined phis. if (U == PN) return PN; - const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock); + const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock); return lookupOperandLeader(U, I, BBE); }); return E; @@ -710,6 +714,15 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, return E; } +// Utility function to check whether the congruence class has a member other +// than the given instruction. +bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) { + // Either it has more than one member, in which case it must contain something + // other than us (because it's indexed by value), or if it only has one member + // right now, that member should not be us. + return CC->Members.size() > 1 || CC->Members.count(I) == 0; +} + const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, const BasicBlock *B) { // Unlike loads, we never try to eliminate stores, so we do not check if they @@ -725,8 +738,12 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, cast(StoreAccess)->getDefiningAccess()); const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); + // Basically, check if the congruence class the store is in is defined by a + // store that isn't us, and has the same value. MemorySSA takes care of + // ensuring the store has the same memory state as us already. if (CC && CC->DefiningExpr && isa(CC->DefiningExpr) && - CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B)) + CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) && + hasMemberOtherThanUs(CC, I)) return createStoreExpression(SI, StoreRHS, B); } @@ -810,36 +827,50 @@ bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) { const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, const BasicBlock *B) { auto *E = cast(createPHIExpression(I)); - if (E->op_empty()) { + // We match the semantics of SimplifyPhiNode from InstructionSimplify here. + + // See if all arguaments are the same. + // We track if any were undef because they need special handling. + bool HasUndef = false; + auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { + if (Arg == I) + return false; + if (isa(Arg)) { + HasUndef = true; + return false; + } + return true; + }); + // If we are left with no operands, it's undef + if (Filtered.begin() == Filtered.end()) { DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" << "\n"); E->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); return createConstantExpression(UndefValue::get(I->getType())); } - - Value *AllSameValue = E->getOperand(0); - - // See if all arguments are the same, ignoring undef arguments, because we can - // choose a value that is the same for them. - for (const Value *Arg : E->operands()) - if (Arg != AllSameValue && !isa(Arg)) { - AllSameValue = nullptr; - break; + Value *AllSameValue = *(Filtered.begin()); + ++Filtered.begin(); + // Can't use std::equal here, sadly, because filter.begin moves. + if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { + return V == AllSameValue; + })) { + // In LLVM's non-standard representation of phi nodes, it's possible to have + // phi nodes with cycles (IE dependent on other phis that are .... dependent + // on the original phi node), especially in weird CFG's where some arguments + // are unreachable, or uninitialized along certain paths. This can cause + // infinite loops during evaluation. We work around this by not trying to + // really evaluate them independently, but instead using a variable + // expression to say if one is equivalent to the other. + // We also special case undef, so that if we have an undef, we can't use the + // common value unless it dominates the phi block. + if (HasUndef) { + // Only have to check for instructions + if (auto *AllSameInst = dyn_cast(AllSameValue)) + if (!DT->dominates(AllSameInst, I)) + return E; } - if (AllSameValue) { - // It's possible to have phi nodes with cycles (IE dependent on - // other phis that are .... dependent on the original phi node), - // especially in weird CFG's where some arguments are unreachable, or - // uninitialized along certain paths. - // This can cause infinite loops during evaluation (even if you disable - // the recursion below, you will simply ping-pong between congruence - // classes). If a phi node symbolically evaluates to another phi node, - // just leave it alone. If they are really the same, we will still - // eliminate them in favor of each other. - if (isa(AllSameValue)) - return E; NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); @@ -1007,12 +1038,22 @@ void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) { } } +// Touch the instructions that need to be updated after a congruence class has a +// leader change, and mark changed values. +void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) { + for (auto M : CC->Members) { + if (auto *I = dyn_cast(M)) + TouchedInstructions.set(InstrDFS[I]); + ChangedValues.insert(M); + } +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { - ValueToExpression[V] = E; // This is guaranteed to return something, since it will at least find // INITIAL. + CongruenceClass *VClass = ValueToClass[V]; assert(VClass && "Should have found a vclass"); // Dead classes should have been eliminated from the mapping. @@ -1031,14 +1072,17 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { place->second = NewClass; // Constants and variables should always be made the leader. - if (const auto *CE = dyn_cast(E)) + if (const auto *CE = dyn_cast(E)) { NewClass->RepLeader = CE->getConstantValue(); - else if (const auto *VE = dyn_cast(E)) - NewClass->RepLeader = VE->getVariableValue(); - else if (const auto *SE = dyn_cast(E)) - NewClass->RepLeader = SE->getStoreInst()->getValueOperand(); - else + } else if (const auto *SE = dyn_cast(E)) { + StoreInst *SI = SE->getStoreInst(); + NewClass->RepLeader = + lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + } else { NewClass->RepLeader = V; + } + assert(!isa(E) && + "VariableExpression should have been handled already"); EClass = NewClass; DEBUG(dbgs() << "Created new congruence class for " << *V @@ -1077,14 +1121,11 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { ExpressionToClass.erase(VClass->DefiningExpr); } } else if (VClass->RepLeader == V) { - // FIXME: When the leader changes, the value numbering of - // everything may change, so we need to reprocess. + // When the leader changes, the value numbering of + // everything may change due to symbolization changes, so we need to + // reprocess. VClass->RepLeader = *(VClass->Members.begin()); - for (auto M : VClass->Members) { - if (auto *I = dyn_cast(M)) - TouchedInstructions.set(InstrDFS[I]); - ChangedValues.insert(M); - } + markLeaderChangeTouched(VClass); } } @@ -1106,6 +1147,27 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { markMemoryUsersTouched(MA); } } + } else if (StoreInst *SI = dyn_cast(V)) { + // There is, sadly, one complicating thing for stores. Stores do not + // produce values, only consume them. However, in order to make loads and + // stores value number the same, we ignore the value operand of the store. + // But the value operand will still be the leader of our class, and thus, it + // may change. Because the store is a use, the store will get reprocessed, + // but nothing will change about it, and so nothing above will catch it + // (since the class will not change). In order to make sure everything ends + // up okay, we need to recheck the leader of the class. Since stores of + // different values value number differently due to different memorydefs, we + // are guaranteed the leader is always the same between stores in the same + // class. + DEBUG(dbgs() << "Checking store leader\n"); + auto ProperLeader = + lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent()); + if (EClass->RepLeader != ProperLeader) { + DEBUG(dbgs() << "Store leader changed, fixing\n"); + EClass->RepLeader = ProperLeader; + markLeaderChangeTouched(EClass); + markMemoryUsersTouched(MSSA->getMemoryAccess(SI)); + } } } @@ -1708,8 +1770,9 @@ struct NewGVN::ValueDFS { } }; -void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, - std::vector &DFSOrderedSet) { +void NewGVN::convertDenseToDFSOrdered( + CongruenceClass::MemberSet &Dense, + SmallVectorImpl &DFSOrderedSet) { for (auto D : Dense) { // First add the value. BasicBlock *BB = getBlockForValue(D); @@ -1972,21 +2035,25 @@ bool NewGVN::eliminateInstructions(Function &F) { ValueDFSStack EliminationStack; // Convert the members to DFS ordered sets and then merge them. - std::vector DFSOrderedSet; + SmallVector DFSOrderedSet; convertDenseToDFSOrdered(CC->Members, DFSOrderedSet); // Sort the whole thing. - sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); - - for (auto &C : DFSOrderedSet) { - int MemberDFSIn = C.DFSIn; - int MemberDFSOut = C.DFSOut; - Value *Member = C.Val; - Use *MemberUse = C.U; - - // We ignore void things because we can't get a value from them. - if (Member && Member->getType()->isVoidTy()) - continue; + std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); + + for (auto &VD : DFSOrderedSet) { + int MemberDFSIn = VD.DFSIn; + int MemberDFSOut = VD.DFSOut; + Value *Member = VD.Val; + Use *MemberUse = VD.U; + + if (Member) { + // We ignore void things because we can't get a value from them. + // FIXME: We could actually use this to kill dead stores that are + // dominated by equivalent earlier stores. + if (Member->getType()->isVoidTy()) + continue; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -1995,8 +2062,6 @@ bool NewGVN::eliminateInstructions(Function &F) { << EliminationStack.dfs_back().first << "," << EliminationStack.dfs_back().second << ")\n"); } - if (Member && isa(Member)) - assert(isa(CC->RepLeader)); DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," << MemberDFSOut << ")\n"); @@ -2037,11 +2102,8 @@ bool NewGVN::eliminateInstructions(Function &F) { continue; Value *Result = EliminationStack.back(); - // Don't replace our existing users with ourselves, and don't replace - // phi node arguments with the result of the same phi node. - // IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef) - if (MemberUse->get() == Result || - (isa(Result) && MemberUse->getUser() == Result)) + // Don't replace our existing users with ourselves. + if (MemberUse->get() == Result) continue; DEBUG(dbgs() << "Found replacement " << *Result << " for " diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 8a6be97d08c7..34be90692481 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -511,9 +511,6 @@ private: void visitSelectInst(SelectInst &I); void visitBinaryOperator(Instruction &I); void visitCmpInst(CmpInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitShuffleVectorInst(ShuffleVectorInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); } @@ -970,21 +967,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { markOverdefined(&I); } -void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) { - // TODO : SCCP does not handle vectors properly. - return markOverdefined(&I); -} - -void SCCPSolver::visitInsertElementInst(InsertElementInst &I) { - // TODO : SCCP does not handle vectors properly. - return markOverdefined(&I); -} - -void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) { - // TODO : SCCP does not handle vectors properly. - return markOverdefined(&I); -} - // Handle getelementptr instructions. If all operands are constants then we // can turn this into a getelementptr ConstantExpr. // diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index 678d02e05d42..9844190ef84a 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -67,12 +67,15 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( return true; } - // When exporting, consult the index. - auto Summaries = ImportIndex.findGlobalValueSummaryList(SGV->getGUID()); - assert(Summaries != ImportIndex.end() && - "Missing summary for global value when exporting"); - assert(Summaries->second.size() == 1 && "Local has more than one summary"); - auto Linkage = Summaries->second.front()->linkage(); + // When exporting, consult the index. We can have more than one local + // with the same GUID, in the case of same-named locals in different but + // same-named source files that were compiled in their respective directories + // (so the source file name and resulting GUID is the same). Find the one + // in this module. + auto Summary = ImportIndex.findSummaryInModule( + SGV->getGUID(), SGV->getParent()->getModuleIdentifier()); + assert(Summary && "Missing summary for global value when exporting"); + auto Linkage = Summary->linkage(); if (!GlobalValue::isLocalLinkage(Linkage)) { assert(!isNonRenamableLocal(*SGV) && "Attempting to promote non-renamable local"); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index c8f030f7eb83..11d54bcf4f89 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1189,19 +1189,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; StringRef Name = Callee->getName(); if (Name == "fabs" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, false); + return optimizeUnaryDoubleFP(CI, B, false); - Value *Op = CI->getArgOperand(0); - if (Instruction *I = dyn_cast(Op)) { - // Fold fabs(x * x) -> x * x; any squared FP value must already be positive. - if (I->getOpcode() == Instruction::FMul) - if (I->getOperand(0) == I->getOperand(1)) - return Op; - } - return Ret; + return nullptr; } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 31daba2248aa..578c65daf7c0 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -783,6 +783,10 @@ protected: // Similarly, we create a new latch condition when setting up the structure // of the new loop, so the old one can become dead. SmallPtrSet DeadInstructions; + + // Holds the end values for each induction variable. We save the end values + // so we can later fix-up the external users of the induction variables. + DenseMap IVEndValues; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -1879,13 +1883,6 @@ public: unsigned selectInterleaveCount(bool OptForSize, unsigned VF, unsigned LoopCost); - /// \return The most profitable unroll factor. - /// This method finds the best unroll-factor based on register pressure and - /// other parameters. VF and LoopCost are the selected vectorization factor - /// and the cost of the selected VF. - unsigned computeInterleaveCount(bool OptForSize, unsigned VF, - unsigned LoopCost); - /// \brief A struct that represents some properties of the register usage /// of a loop. struct RegisterUsage { @@ -3424,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() { // Create phi nodes to merge from the backedge-taken check block. PHINode *BCResumeVal = PHINode::Create( OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator()); - Value *EndValue; + Value *&EndValue = IVEndValues[OrigPhi]; if (OrigPhi == OldInduction) { // We know what the end value is. EndValue = CountRoundDown; @@ -3443,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() { // or the value at the end of the vectorized loop. BCResumeVal->addIncoming(EndValue, MiddleBlock); - // Fix up external users of the induction variable. - fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock); - // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); @@ -4116,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() { Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } // end of for each Phi in PHIsToFix. - fixLCSSAPHIs(); - - // Make sure DomTree is updated. + // Update the dominator tree. + // + // FIXME: After creating the structure of the new loop, the dominator tree is + // no longer up-to-date, and it remains that way until we update it + // here. An out-of-date dominator tree is problematic for SCEV, + // because SCEVExpander uses it to guide code generation. The + // vectorizer use SCEVExpanders in several places. Instead, we should + // keep the dominator tree up-to-date as we go. updateAnalysis(); + // Fix-up external users of the induction variables. + for (auto &Entry : *Legal->getInductionVars()) + fixupIVUsers(Entry.first, Entry.second, + getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)), + IVEndValues[Entry.first], LoopMiddleBlock); + + fixLCSSAPHIs(); predicateInstructions(); // Remove redundant induction instructions. diff --git a/test/Analysis/CostModel/X86/shuffle-reverse.ll b/test/Analysis/CostModel/X86/shuffle-reverse.ll index a1bdda0690aa..627d79857434 100644 --- a/test/Analysis/CostModel/X86/shuffle-reverse.ll +++ b/test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -161,7 +161,7 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) ; AVX1: cost of 8 {{.*}} %V512 = shufflevector ; AVX2: cost of 4 {{.*}} %V512 = shufflevector ; AVX512F: cost of 4 {{.*}} %V512 = shufflevector - ; AVX512BW: cost of 6 {{.*}} %V512 = shufflevector + ; AVX512BW: cost of 2 {{.*}} %V512 = shufflevector %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ret void diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll index 52f176fe4d63..e5fff9b5e4da 100644 --- a/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -498,7 +498,7 @@ entry: define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { entry: ; SSE2: shift16i8c - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN: shift16i8c ; SSE2-CODEGEN: psrlw $3 @@ -513,7 +513,7 @@ entry: define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { entry: ; SSE2: shift32i8c - ; SSE2: cost of 2 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift32i8c ; SSE2-CODEGEN: psrlw $3 diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll index e385c5bfeeac..6628b9b87986 100644 --- a/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/test/Analysis/CostModel/X86/testshiftshl.ll @@ -498,7 +498,7 @@ entry: define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { entry: ; SSE2: shift16i8c - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN: shift16i8c ; SSE2-CODEGEN: psllw $3 @@ -513,7 +513,7 @@ entry: define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { entry: ; SSE2: shift32i8c - ; SSE2: cost of 2 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift32i8c ; SSE2-CODEGEN: psllw $3 diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index 888164df75f5..6756f3ba2802 100644 --- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -120,7 +120,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = ashr <32 x i16> %a, %b ret <32 x i16> %shift @@ -282,7 +282,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer %shift = ashr <32 x i16> %a, %splat @@ -439,7 +439,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = ashr <32 x i16> %a, ret <32 x i16> %shift @@ -529,8 +529,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift -; XOPAVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = ashr <4 x i32> %a, ret <4 x i32> %shift } @@ -568,7 +567,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i16> %a, ret <8 x i16> %shift } @@ -578,9 +577,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 10 for instruction: %shift -; AVX512: Found an estimated cost of 10 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } @@ -590,10 +590,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 20 for instruction: %shift -; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <32 x i16> %a, ret <32 x i16> %shift } @@ -605,7 +606,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i8> %a, ret <16 x i8> %shift } @@ -615,9 +616,10 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift -; AVX2: Found an estimated cost of 24 for instruction: %shift -; AVX512: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } @@ -627,10 +629,11 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift ; AVX: Found an estimated cost of 16 for instruction: %shift -; AVX2: Found an estimated cost of 48 for instruction: %shift -; AVX512F: Found an estimated cost of 48 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX512F: Found an estimated cost of 8 for instruction: %shift +; AVX512BW: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 16 for instruction: %shift +; XOPAVX2: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, ret <64 x i8> %shift } diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index b3382253739f..63e6db194d52 100644 --- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -123,7 +123,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = lshr <32 x i16> %a, %b ret <32 x i16> %shift @@ -287,7 +287,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer %shift = lshr <32 x i16> %a, %splat @@ -447,7 +447,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 8 for instruction: %shift %shift = lshr <32 x i16> %a, ret <32 x i16> %shift @@ -501,8 +501,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift -; XOPAVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = lshr <2 x i64> %a, ret <2 x i64> %shift } @@ -540,8 +539,7 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift -; XOPAVX2: Found an estimated cost of 1 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i32> %a, ret <4 x i32> %shift } @@ -579,7 +577,7 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i16> %a, ret <8 x i16> %shift } @@ -589,9 +587,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 10 for instruction: %shift -; AVX512: Found an estimated cost of 10 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } @@ -601,21 +600,22 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 20 for instruction: %shift -; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': -; SSE2: Found an estimated cost of 1 for instruction: %shift -; SSE41: Found an estimated cost of 1 for instruction: %shift -; AVX: Found an estimated cost of 1 for instruction: %shift -; AVX2: Found an estimated cost of 1 for instruction: %shift -; AVX512: Found an estimated cost of 1 for instruction: %shift +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, ret <16 x i8> %shift @@ -623,25 +623,27 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': -; SSE2: Found an estimated cost of 2 for instruction: %shift -; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 11 for instruction: %shift -; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': -; SSE2: Found an estimated cost of 4 for instruction: %shift -; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 22 for instruction: %shift -; AVX512F: Found an estimated cost of 22 for instruction: %shift +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = lshr <64 x i8> %a, ret <64 x i8> %shift } diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 804c5a76c319..8c42bd66c707 100644 --- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -57,8 +57,8 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 10 for instruction: %shift -; SSE41: Found an estimated cost of 10 for instruction: %shift -; AVX: Found an estimated cost of 10 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift @@ -70,8 +70,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift -; SSE41: Found an estimated cost of 20 for instruction: %shift -; AVX: Found an estimated cost of 20 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift @@ -83,8 +83,8 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 40 for instruction: %shift -; SSE41: Found an estimated cost of 40 for instruction: %shift -; AVX: Found an estimated cost of 40 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift @@ -124,7 +124,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = shl <32 x i16> %a, %b ret <32 x i16> %shift @@ -216,8 +216,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 10 for instruction: %shift -; SSE41: Found an estimated cost of 10 for instruction: %shift -; AVX: Found an estimated cost of 10 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift @@ -230,8 +230,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift -; SSE41: Found an estimated cost of 20 for instruction: %shift -; AVX: Found an estimated cost of 20 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift @@ -244,8 +244,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': ; SSE2: Found an estimated cost of 40 for instruction: %shift -; SSE41: Found an estimated cost of 40 for instruction: %shift -; AVX: Found an estimated cost of 40 for instruction: %shift +; SSE41: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 16 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift @@ -288,7 +288,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX: Found an estimated cost of 56 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer %shift = shl <32 x i16> %a, %splat @@ -449,7 +449,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i16> %a, @@ -607,7 +607,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift -; AVX512BW: Found an estimated cost of 2 for instruction: %shift +; AVX512BW: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i16> %a, @@ -616,37 +616,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': -; SSE2: Found an estimated cost of 1 for instruction: %shift -; SSE41: Found an estimated cost of 1 for instruction: %shift -; AVX: Found an estimated cost of 1 for instruction: %shift -; AVX2: Found an estimated cost of 1 for instruction: %shift -; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 1 for instruction: %shift +; SSE2: Found an estimated cost of 2 for instruction: %shift +; SSE41: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': -; SSE2: Found an estimated cost of 2 for instruction: %shift -; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 11 for instruction: %shift -; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; SSE2: Found an estimated cost of 4 for instruction: %shift +; SSE41: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': -; SSE2: Found an estimated cost of 4 for instruction: %shift -; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 22 for instruction: %shift -; AVX512F: Found an estimated cost of 22 for instruction: %shift +; SSE2: Found an estimated cost of 8 for instruction: %shift +; SSE41: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX2: Found an estimated cost of 4 for instruction: %shift +; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = shl <64 x i8> %a, ret <64 x i8> %shift } diff --git a/test/Analysis/ScalarEvolution/invalidation.ll b/test/Analysis/ScalarEvolution/invalidation.ll new file mode 100644 index 000000000000..1fcaddb525e6 --- /dev/null +++ b/test/Analysis/ScalarEvolution/invalidation.ll @@ -0,0 +1,70 @@ +; Test that SCEV gets invalidated when one of its dependencies is invalidated. +; +; Each of the RUNs checks that the pass manager runs SCEV, then invalidates it +; due to a dependency being invalidated, and then re-urns it. This will +; directly fail and indicates a failure that would occur later if we ddidn't +; invalidate SCEV in this way. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; RUN: opt < %s -passes='require,invalidate,print' \ +; RUN: -debug-pass-manager -disable-output 2>&1 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-AC-INVALIDATE +; +; CHECK-AC-INVALIDATE: Running pass: RequireAnalysisPass +; CHECK-AC-INVALIDATE: Running analysis: ScalarEvolutionAnalysis +; CHECK-AC-INVALIDATE: Running analysis: AssumptionAnalysis +; CHECK-AC-INVALIDATE: Running pass: InvalidateAnalysisPass +; CHECK-AC-INVALIDATE: Invalidating analysis: AssumptionAnalysis +; CHECK-AC-INVALIDATE: Running pass: ScalarEvolutionPrinterPass +; CHECK-AC-INVALIDATE: Running analysis: ScalarEvolutionAnalysis +; CHECK-AC-INVALIDATE: Running analysis: AssumptionAnalysis + +; RUN: opt < %s -passes='require,invalidate,print' \ +; RUN: -debug-pass-manager -disable-output 2>&1 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-DT-INVALIDATE +; +; CHECK-DT-INVALIDATE: Running pass: RequireAnalysisPass +; CHECK-DT-INVALIDATE: Running analysis: ScalarEvolutionAnalysis +; CHECK-DT-INVALIDATE: Running analysis: DominatorTreeAnalysis +; CHECK-DT-INVALIDATE: Running pass: InvalidateAnalysisPass +; CHECK-DT-INVALIDATE: Invalidating analysis: DominatorTreeAnalysis +; CHECK-DT-INVALIDATE: Running pass: ScalarEvolutionPrinterPass +; CHECK-DT-INVALIDATE: Running analysis: ScalarEvolutionAnalysis +; CHECK-DT-INVALIDATE: Running analysis: DominatorTreeAnalysis + +; RUN: opt < %s -passes='require,invalidate,print' \ +; RUN: -debug-pass-manager -disable-output 2>&1 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LI-INVALIDATE +; +; CHECK-LI-INVALIDATE: Running pass: RequireAnalysisPass +; CHECK-LI-INVALIDATE: Running analysis: ScalarEvolutionAnalysis +; CHECK-LI-INVALIDATE: Running analysis: LoopAnalysis +; CHECK-LI-INVALIDATE: Running pass: InvalidateAnalysisPass +; CHECK-LI-INVALIDATE: Invalidating analysis: LoopAnalysis +; CHECK-LI-INVALIDATE: Running pass: ScalarEvolutionPrinterPass +; CHECK-LI-INVALIDATE: Running analysis: ScalarEvolutionAnalysis +; CHECK-LI-INVALIDATE: Running analysis: LoopAnalysis + +; This test isn't particularly interesting, its just enough to make sure we +; actually do some work inside of SCEV so that if we regress here despite the +; debug pass printing continuing to match, ASan and other tools can catch it. +define void @test(i32 %n) { +; CHECK-LABEL: Classifying expressions for: @test +; CHECK: Loop %loop: backedge-taken count is 14 +; CHECK: Loop %loop: max backedge-taken count is 14 +; CHECK: Loop %loop: Predicated backedge-taken count is 14 + +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ] + %iv.inc = add nsw i32 %iv, 3 + %becond = icmp ne i32 %iv.inc, 46 + br i1 %becond, label %loop, label %leave + +leave: + ret void +} diff --git a/test/Analysis/ValueTracking/assume.ll b/test/Analysis/ValueTracking/assume.ll index 4bffe8ef7909..fe0ee53eb416 100644 --- a/test/Analysis/ValueTracking/assume.ll +++ b/test/Analysis/ValueTracking/assume.ll @@ -1,14 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @assume_add(i32 %a, i32 %b) { ; CHECK-LABEL: @assume_add( - %1 = add i32 %a, %b - %last_two_digits = and i32 %1, 3 - %2 = icmp eq i32 %last_two_digits, 0 - call void @llvm.assume(i1 %2) - %3 = add i32 %1, 3 -; CHECK: %3 = or i32 %1, 3 - ret i32 %3 +; CHECK-NEXT: [[T1:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[LAST_TWO_DIGITS:%.*]] = and i32 [[T1]], 3 +; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[LAST_TWO_DIGITS]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = or i32 [[T1]], 3 +; CHECK-NEXT: ret i32 [[T3]] +; + %t1 = add i32 %a, %b + %last_two_digits = and i32 %t1, 3 + %t2 = icmp eq i32 %last_two_digits, 0 + call void @llvm.assume(i1 %t2) + %t3 = add i32 %t1, 3 + ret i32 %t3 } declare void @llvm.assume(i1) + diff --git a/test/Bindings/Go/lit.local.cfg b/test/Bindings/Go/lit.local.cfg index d68d867fb308..a587f88f54aa 100644 --- a/test/Bindings/Go/lit.local.cfg +++ b/test/Bindings/Go/lit.local.cfg @@ -6,7 +6,7 @@ import sys if not 'go' in config.root.llvm_bindings: config.unsupported = True -if config.root.include_go_tests != 'ON': +if not config.root.include_go_tests: config.unsupported = True def find_executable(executable, path=None): diff --git a/test/Bindings/OCaml/lit.local.cfg b/test/Bindings/OCaml/lit.local.cfg index 7a83ca142808..fd9e1c50e990 100644 --- a/test/Bindings/OCaml/lit.local.cfg +++ b/test/Bindings/OCaml/lit.local.cfg @@ -3,5 +3,5 @@ config.suffixes = ['.ml'] if not 'ocaml' in config.root.llvm_bindings: config.unsupported = True -if config.root.have_ocaml_ounit not in ('1', 'TRUE'): +if not config.root.have_ocaml_ounit: config.unsupported = True diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 635197bc9ddd..c1667049f80f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,14 @@ -if(LLVM_BUILD_EXAMPLES) - set(ENABLE_EXAMPLES 1) -endif() +llvm_canonicalize_cmake_booleans( + LLVM_TOOL_LTO_BUILD + HAVE_OCAMLOPT + HAVE_OCAML_OUNIT + LLVM_INCLUDE_GO_TESTS + LLVM_USE_INTEL_JITEVENTS + HAVE_LIBZ + HAVE_LIBXAR + LLVM_ENABLE_DIA_SDK + LLVM_ENABLE_FFI + BUILD_SHARED_LIBS) configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in diff --git a/test/CodeGen/AMDGPU/load-constant-i16.ll b/test/CodeGen/AMDGPU/load-constant-i16.ll index 628d285141bc..eb79767e62be 100644 --- a/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -137,8 +137,8 @@ define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x ; v2i16 is naturally 4 byte aligned ; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 -; TODO: This should use DST, but for some there are redundant MOVs -; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal +; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal +; EG: 16 ; EG: 16 define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { %load = load <2 x i16>, <2 x i16> addrspace(2)* %in @@ -153,11 +153,11 @@ define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x ; GCN-DAG: s_sext_i32_i16 ; v2i16 is naturally 4 byte aligned +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, ; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; TODO: We should also use ASHR instead of LSHR + BFE -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, [[DST]], 0.0, literal +; TODO: We should use ASHR instead of LSHR + BFE +; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal ; EG-DAG: 16 ; EG-DAG: 16 define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { @@ -167,16 +167,23 @@ define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x ret void } -; FUNC-LABEL: {{^}}constant_constant_zextload_v3i16_to_v3i32: +; FUNC-LABEL: {{^}}constant_zextload_v3i16_to_v3i32: ; GCN: s_load_dwordx2 ; v3i16 is naturally 8 byte aligned -; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1 -; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1 +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}}, +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}}, +; EG: CF_END +; EG-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 +; EG-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 ; TODO: This should use DST, but for some there are redundant MOVs -; EG: LSHR {{[* ]*}}{{T[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal -; EG: 16 -define void @constant_constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { +; EG-DAG: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: 16 +; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: 65535 +; EG-DAG: 65535 +define void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in %ext = zext <3 x i16> %ld to <3 x i32> @@ -184,19 +191,20 @@ entry: ret void } -; FUNC-LABEL: {{^}}constant_constant_sextload_v3i16_to_v3i32: +; FUNC-LABEL: {{^}}constant_sextload_v3i16_to_v3i32: ; GCN: s_load_dwordx2 +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}}, +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}}, ; v3i16 is naturally 8 byte aligned -; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1 -; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: ASHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal +; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[PTR:T[0-9]\.[XYZW]]], 0, #1 +; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 +; EG-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal ; EG-DAG: 16 ; EG-DAG: 16 -define void @constant_constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { +define void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in %ext = sext <3 x i16> %ld to <3 x i32> @@ -204,20 +212,24 @@ entry: ret void } -; FUNC-LABEL: {{^}}constant_constant_zextload_v4i16_to_v4i32: +; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i32: ; GCN: s_load_dwordx2 ; GCN-DAG: s_and_b32 ; GCN-DAG: s_lshr_b32 ; v4i16 is naturally 8 byte aligned -; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}} +; EG: VTX_READ_64 [[LD:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: This should use LD, but for some there are redundant MOVs +; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].Y, {{.*\.[XYZW]}}, literal +; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].W, {{.*\.[XYZW]}}, literal ; EG-DAG: 16 -; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal -; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal ; EG-DAG: 16 -define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { +; EG-DAG: AND_INT {{[* ]*}}[[ST]].X, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: 65535 +; EG-DAG: 65535 +define void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { %load = load <4 x i16>, <4 x i16> addrspace(2)* %in %ext = zext <4 x i16> %load to <4 x i32> store <4 x i32> %ext, <4 x i32> addrspace(1)* %out @@ -230,13 +242,14 @@ define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* % ; GCN-DAG: s_sext_i32_i16 ; v4i16 is naturally 8 byte aligned -; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EG: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: This should use LD, but for some there are redundant MOVs +; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Z, {{.*}}, 0.0, literal ; TODO: We should use ASHR instead of LSHR + BFE -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST]].W, {{.*}}, 0.0, literal ; EG-DAG: 16 ; EG-DAG: 16 ; EG-DAG: 16 @@ -254,24 +267,27 @@ define void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x ; GCN-DAG: s_lshr_b32 ; v8i16 is naturally 16 byte aligned -; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, +; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: These should use LSHR instead of BFE_UINT +; TODO: This should use DST, but for some there are redundant MOVs +; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].Y, {{.*}}, literal +; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].W, {{.*}}, literal +; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].Y, {{.*}}, literal +; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].W, {{.*}}, literal +; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, literal +; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, literal +; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, literal +; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, literal ; EG-DAG: 16 ; EG-DAG: 16 ; EG-DAG: 16 ; EG-DAG: 16 +; EG-DAG: 65535 +; EG-DAG: 65535 +; EG-DAG: 65535 +; EG-DAG: 65535 define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { %load = load <8 x i16>, <8 x i16> addrspace(2)* %in %ext = zext <8 x i16> %load to <8 x i32> @@ -285,17 +301,19 @@ define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x ; GCN-DAG: s_sext_i32_i16 ; v8i16 is naturally 16 byte aligned -; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; TODO: We should use ASHR instead of LSHR + BFE -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, +; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT +; TODO: This should use DST, but for some there are redundant MOVs +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].W, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Y, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].W, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, 0.0, literal ; EG-DAG: 16 ; EG-DAG: 16 ; EG-DAG: 16 @@ -444,7 +462,7 @@ define void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace( ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal -; TODO: Why not 15 ? +; TODO: These could be expanded earlier using ASHR 15 ; EG: 31 define void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { %a = load i16, i16 addrspace(2)* %in @@ -468,7 +486,7 @@ define void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal -; TODO: Why not 15 ? +; TODO: These could be expanded earlier using ASHR 15 ; EG: 31 define void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(2)* %in diff --git a/test/CodeGen/AMDGPU/load-global-i16.ll b/test/CodeGen/AMDGPU/load-global-i16.ll index f398dd32e06d..7bd131e6516c 100644 --- a/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/test/CodeGen/AMDGPU/load-global-i16.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s ; FIXME: r600 is broken because the bigger testcases spill and it's not implemented @@ -10,7 +10,7 @@ ; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}} ; GCN-HSA: flat_load_ushort -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { entry: %ld = load i16, i16 addrspace(1)* %in @@ -22,7 +22,7 @@ entry: ; GCN-NOHSA: buffer_load_dword v ; GCN-HSA: flat_load_dword v -; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in @@ -34,8 +34,8 @@ entry: ; GCN-NOHSA: buffer_load_dwordx2 v ; GCN-HSA: flat_load_dwordx2 v -; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 -; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1 +; EGCM-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1 define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in @@ -47,7 +47,7 @@ entry: ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { entry: %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in @@ -59,7 +59,7 @@ entry: ; GCN-NOHSA: buffer_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) { entry: %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in @@ -74,8 +74,8 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) { entry: %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in @@ -90,7 +90,7 @@ entry: ; GCN-HSA: flat_load_ushort ; GCN-HSA: flat_store_dword -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { %a = load i16, i16 addrspace(1)* %in %ext = zext i16 %a to i32 @@ -105,9 +105,9 @@ define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1) ; GCN-HSA: flat_load_sshort ; GCN-HSA: flat_store_dword -; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1 -; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal -; EG: 16 +; EGCM: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1 +; EGCM: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal +; EGCM: 16 define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { %a = load i16, i16 addrspace(1)* %in %ext = sext i16 %a to i32 @@ -119,7 +119,7 @@ define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1) ; GCN-NOHSA: buffer_load_ushort ; GCN-HSA: flat_load_ushort -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(1)* %in %ext = zext <1 x i16> %load to <1 x i32> @@ -131,9 +131,9 @@ define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i ; GCN-NOHSA: buffer_load_sshort ; GCN-HSA: flat_load_sshort -; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1 -; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal -; EG: 16 +; EGCM: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1 +; EGCM: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal +; EGCM: 16 define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(1)* %in %ext = sext <1 x i16> %load to <1 x i32> @@ -145,10 +145,9 @@ define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i ; GCN-NOHSA: buffer_load_dword ; GCN-HSA: flat_load_dword -; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 -; TODO: This should use DST, but for some there are redundant MOVs -; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal -; EG: 16 +; EGCM: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 +; EGCM: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal +; EGCM: 16 define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %load = load <2 x i16>, <2 x i16> addrspace(1)* %in %ext = zext <2 x i16> %load to <2 x i32> @@ -161,13 +160,14 @@ define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; GCN-HSA: flat_load_dword -; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; TODO: We should also use ASHR instead of LSHR + BFE -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal -; EG-DAG: 16 -; EG-DAG: 16 +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EGCM: VTX_READ_32 [[DST:T[0-9].[XYZW]]], [[DST]], 0, #1 +; TODO: This should use ASHR instead of LSHR + BFE +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].X, [[DST]], 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV.[XYZW]}}, 0.0, literal +; EGCM-DAG: 16 +; EGCM-DAG: 16 define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %load = load <2 x i16>, <2 x i16> addrspace(1)* %in %ext = sext <2 x i16> %load to <2 x i32> @@ -175,16 +175,22 @@ define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ret void } -; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32: +; FUNC-LABEL: {{^}}global_zextload_v3i16_to_v3i32: ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1 -; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1 +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, +; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 ; TODO: This should use DST, but for some there are redundant MOVs -; EG: LSHR {{[* ]*}}{{T[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal -; EG: 16 -define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { +; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal +; EGCM: 16 +; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal +; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal +define void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in %ext = zext <3 x i16> %ld to <3 x i32> @@ -192,19 +198,23 @@ entry: ret void } -; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32: +; FUNC-LABEL: {{^}}global_sextload_v3i16_to_v3i32: ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1 -; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: ASHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal -; EG-DAG: 16 -; EG-DAG: 16 -define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, +; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 4, #1 +; TODO: This should use DST, but for some there are redundant MOVs +; EGCM-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], 0.0, literal +; EGCM-DAG: 16 +; EGCM-DAG: 16 +define void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in %ext = sext <3 x i16> %ld to <3 x i32> @@ -212,19 +222,22 @@ entry: ret void } -; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32: +; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i32: ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal -; EG-DAG: 16 -; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal -; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal -; EG-DAG: 16 -define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EGCM: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: This should use DST, but for some there are redundant MOVs +; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST]].Y, {{.*}}, literal +; EGCM-DAG: 16 +; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST]].W, {{.*}}, literal +; EGCM-DAG: AND_INT {{[* ]*}}[[ST]].X, {{.*}}, literal +; EGCM-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{.*}}, literal +; EGCM-DAG: 16 +define void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %load = load <4 x i16>, <4 x i16> addrspace(1)* %in %ext = zext <4 x i16> %load to <4 x i32> store <4 x i32> %ext, <4 x i32> addrspace(1)* %out @@ -236,17 +249,19 @@ define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, ; GCN-HSA: flat_load_dwordx2 -; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EGCM: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 ; TODO: We should use ASHR instead of LSHR + BFE -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 +; TODO: This should use DST, but for some there are redundant MOVs +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].X, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Z, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].W, {{.*}}, 0.0, literal +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %load = load <4 x i16>, <4 x i16> addrspace(1)* %in %ext = sext <4 x i16> %load to <4 x i32> @@ -258,16 +273,29 @@ define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i ; GCN-NOHSA: buffer_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 +; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EGCM: CF_END +; EGCM: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: These should use LSHR instead of BFE_UINT +; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].Y, {{.*}}, literal +; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].W, {{.*}}, literal +; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].Y, {{.*}}, literal +; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].W, {{.*}}, literal +; EGCM-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, literal +; EGCM-DAG: AND_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, literal +; EGCM-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, literal +; EGCM-DAG: AND_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, literal +; EGCM-DAG: 65535 +; EGCM-DAG: 65535 +; EGCM-DAG: 65535 +; EGCM-DAG: 65535 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { %load = load <8 x i16>, <8 x i16> addrspace(1)* %in %ext = zext <8 x i16> %load to <8 x i32> @@ -279,24 +307,29 @@ define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i ; GCN-NOHSA: buffer_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1 -; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 -; EG-DAG: 16 +; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]], {{T[0-9]\.[XYZW]}} +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, +; EGCM: CF_END +; EGCM: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 +; TODO: These should use ASHR instead of LSHR + BFE_INT +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].W, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Y, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].W, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, 0.0, literal +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 +; EGCM-DAG: 16 define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { %load = load <8 x i16>, <8 x i16> addrspace(1)* %in %ext = sext <8 x i16> %load to <8 x i32> @@ -311,8 +344,8 @@ define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { %load = load <16 x i16>, <16 x i16> addrspace(1)* %in %ext = zext <16 x i16> %load to <16 x i32> @@ -322,8 +355,8 @@ define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 ; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32: -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { %load = load <16 x i16>, <16 x i16> addrspace(1)* %in %ext = sext <16 x i16> %load to <16 x i32> @@ -342,10 +375,10 @@ define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { %load = load <32 x i16>, <32 x i16> addrspace(1)* %in %ext = zext <32 x i16> %load to <32 x i32> @@ -364,10 +397,10 @@ define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { %load = load <32 x i16>, <32 x i16> addrspace(1)* %in %ext = sext <32 x i16> %load to <32 x i32> @@ -394,14 +427,14 @@ define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1 define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { %load = load <64 x i16>, <64 x i16> addrspace(1)* %in %ext = zext <64 x i16> %load to <64 x i32> @@ -411,14 +444,14 @@ define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 ; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32: -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1 -; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1 +; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1 define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { %load = load <64 x i16>, <64 x i16> addrspace(1)* %in %ext = sext <64 x i16> %load to <64 x i32> @@ -434,8 +467,8 @@ define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 -; EG: MOV {{.*}}, 0.0 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: MOV {{.*}}, 0.0 define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { %a = load i16, i16 addrspace(1)* %in %ext = zext i16 %a to i64 @@ -458,10 +491,10 @@ define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1) ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 -; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal -; TODO: Why not 15 ? -; EG: 31 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal +; TODO: These could be expanded earlier using ASHR 15 +; EGCM: 31 define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { %a = load i16, i16 addrspace(1)* %in %ext = sext i16 %a to i64 @@ -471,8 +504,8 @@ define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1) ; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64: -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 -; EG: MOV {{.*}}, 0.0 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: MOV {{.*}}, 0.0 define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(1)* %in %ext = zext <1 x i16> %load to <1 x i64> @@ -482,10 +515,10 @@ define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i ; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64: -; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 -; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal -; TODO: Why not 15 ? -; EG: 31 +; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal +; TODO: These could be expanded earlier using ASHR 15 +; EGCM: 31 define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(1)* %in %ext = sext <1 x i16> %load to <1 x i64> @@ -503,7 +536,7 @@ define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64: -; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %load = load <2 x i16>, <2 x i16> addrspace(1)* %in %ext = sext <2 x i16> %load to <2 x i64> @@ -513,7 +546,7 @@ define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64: -; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %load = load <4 x i16>, <4 x i16> addrspace(1)* %in %ext = zext <4 x i16> %load to <4 x i64> @@ -523,7 +556,7 @@ define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i ; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64: -; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %load = load <4 x i16>, <4 x i16> addrspace(1)* %in %ext = sext <4 x i16> %load to <4 x i64> @@ -533,7 +566,7 @@ define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i ; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64: -; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { %load = load <8 x i16>, <8 x i16> addrspace(1)* %in %ext = zext <8 x i16> %load to <8 x i64> @@ -543,7 +576,7 @@ define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i ; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64: -; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { %load = load <8 x i16>, <8 x i16> addrspace(1)* %in %ext = sext <8 x i16> %load to <8 x i64> @@ -553,8 +586,8 @@ define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i ; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64: -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { %load = load <16 x i16>, <16 x i16> addrspace(1)* %in %ext = zext <16 x i16> %load to <16 x i64> @@ -564,8 +597,8 @@ define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 ; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64: -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { %load = load <16 x i16>, <16 x i16> addrspace(1)* %in %ext = sext <16 x i16> %load to <16 x i64> @@ -575,10 +608,10 @@ define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 ; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64: -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { %load = load <32 x i16>, <32 x i16> addrspace(1)* %in %ext = zext <32 x i16> %load to <32 x i64> @@ -588,10 +621,10 @@ define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 ; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64: -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 -; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 +; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { %load = load <32 x i16>, <32 x i16> addrspace(1)* %in %ext = sext <32 x i16> %load to <32 x i64> diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll index 5d64a152af3c..13d56535303f 100644 --- a/test/CodeGen/AMDGPU/min.ll +++ b/test/CodeGen/AMDGPU/min.ll @@ -1,10 +1,9 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - ; FUNC-LABEL: {{^}}v_test_imin_sle_i32: -; SI: v_min_i32_e32 +; GCN: v_min_i32_e32 ; EG: MIN_INT define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -17,7 +16,7 @@ define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr } ; FUNC-LABEL: {{^}}s_test_imin_sle_i32: -; SI: s_min_i32 +; GCN: s_min_i32 ; EG: MIN_INT define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -28,7 +27,7 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin } ; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32: -; SI: s_min_i32 +; GCN: s_min_i32 ; EG: MIN_INT define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { @@ -39,10 +38,10 @@ define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, < } ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32: -; SI: s_min_i32 -; SI: s_min_i32 -; SI: s_min_i32 -; SI: s_min_i32 +; GCN: s_min_i32 +; GCN: s_min_i32 +; GCN: s_min_i32 +; GCN: s_min_i32 ; EG: MIN_INT ; EG: MIN_INT @@ -56,11 +55,11 @@ define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, < } ; FUNC-LABEL: {{^}}s_test_imin_sle_i8: -; SI: s_load_dword -; SI: s_load_dword -; SI: s_sext_i32_i8 -; SI: s_sext_i32_i8 -; SI: s_min_i32 +; GCN: s_load_dword +; GCN: s_load_dword +; GCN: s_sext_i32_i8 +; GCN: s_sext_i32_i8 +; GCN: s_min_i32 define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { %cmp = icmp sle i8 %a, %b %val = select i1 %cmp, i8 %a, i8 %b @@ -72,21 +71,26 @@ define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { ; extloads with mubuf instructions. ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8: -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte -; SI: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte +; GCN: buffer_load_sbyte ; SI: v_min_i32 ; SI: v_min_i32 ; SI: v_min_i32 ; SI: v_min_i32 -; SI: s_endpgm +; VI: v_min_i32 +; VI: v_min_i32 +; VI: v_min_i32 +; VI: v_min_i32 + +; GCN: s_endpgm ; EG: MIN_INT ; EG: MIN_INT @@ -117,7 +121,7 @@ define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, < } ; FUNC-LABEL: @v_test_imin_slt_i32 -; SI: v_min_i32_e32 +; GCN: v_min_i32_e32 ; EG: MIN_INT define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -130,7 +134,7 @@ define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr } ; FUNC-LABEL: @s_test_imin_slt_i32 -; SI: s_min_i32 +; GCN: s_min_i32 ; EG: MIN_INT define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -141,8 +145,8 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin } ; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32: -; SI: s_min_i32 -; SI: s_min_i32 +; GCN: s_min_i32 +; GCN: s_min_i32 ; EG: MIN_INT ; EG: MIN_INT @@ -154,7 +158,7 @@ define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, < } ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32: -; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 +; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}} define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -165,7 +169,7 @@ define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { } ; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32: -; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 +; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 ; EG: MIN_INT {{.*}}literal.{{[xyzw]}} define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -176,7 +180,7 @@ define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { } ; FUNC-LABEL: @v_test_umin_ule_i32 -; SI: v_min_u32_e32 +; GCN: v_min_u32_e32 ; EG: MIN_UINT define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -189,11 +193,11 @@ define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr } ; FUNC-LABEL: @v_test_umin_ule_v3i32 -; SI: v_min_u32_e32 -; SI: v_min_u32_e32 -; SI: v_min_u32_e32 +; GCN: v_min_u32_e32 +; GCN: v_min_u32_e32 +; GCN: v_min_u32_e32 ; SI-NOT: v_min_u32_e32 -; SI: s_endpgm +; GCN: s_endpgm ; EG: MIN_UINT ; EG: MIN_UINT @@ -207,7 +211,7 @@ define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrs ret void } ; FUNC-LABEL: @s_test_umin_ule_i32 -; SI: s_min_u32 +; GCN: s_min_u32 ; EG: MIN_UINT define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -218,7 +222,7 @@ define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin } ; FUNC-LABEL: @v_test_umin_ult_i32 -; SI: v_min_u32_e32 +; GCN: v_min_u32_e32 ; EG: MIN_UINT define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -231,9 +235,9 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr } ; FUNC-LABEL: {{^}}v_test_umin_ult_i8: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: v_min_u32_e32 +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: v_min_u32_e32 ; EG: MIN_UINT define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { @@ -246,7 +250,7 @@ define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i } ; FUNC-LABEL: @s_test_umin_ult_i32 -; SI: s_min_u32 +; GCN: s_min_u32 ; EG: MIN_UINT define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -258,10 +262,10 @@ define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin ; FUNC-LABEL: @v_test_umin_ult_i32_multi_use ; SI-NOT: v_min -; SI: v_cmp_lt_u32 +; GCN: v_cmp_lt_u32 ; SI-NEXT: v_cndmask_b32 ; SI-NOT: v_min -; SI: s_endpgm +; GCN: s_endpgm ; EG-NOT: MIN_UINT define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -274,9 +278,27 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace ret void } +; FUNC-LABEL: @v_test_umin_ult_i16_multi_use +; GCN-NOT: v_min +; GCN: v_cmp_lt_u32 +; GCN-NEXT: v_cndmask_b32 +; GCN-NOT: v_min +; GCN: s_endpgm + +; EG-NOT: MIN_UINT +define void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %a = load i16, i16 addrspace(1)* %aptr, align 2 + %b = load i16, i16 addrspace(1)* %bptr, align 2 + %cmp = icmp ult i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out0, align 2 + store i1 %cmp, i1 addrspace(1)* %out1 + ret void +} + ; FUNC-LABEL: @s_test_umin_ult_v1i32 -; SI: s_min_u32 +; GCN: s_min_u32 ; EG: MIN_UINT define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { @@ -287,14 +309,14 @@ define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, < } ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32: -; SI: s_min_u32 -; SI: s_min_u32 -; SI: s_min_u32 -; SI: s_min_u32 -; SI: s_min_u32 -; SI: s_min_u32 -; SI: s_min_u32 -; SI: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 +; GCN: s_min_u32 ; EG: MIN_UINT ; EG: MIN_UINT @@ -312,14 +334,14 @@ define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, < } ; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16: -; SI: v_min_u32 -; SI: v_min_u32 -; SI: v_min_u32 -; SI: v_min_u32 -; SI: v_min_u32 -; SI: v_min_u32 -; SI: v_min_u32 -; SI: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 +; GCN: v_min_u32 ; EG: MIN_UINT ; EG: MIN_UINT @@ -338,11 +360,11 @@ define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, < ; Make sure redundant and removed ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16: -; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc -; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]] -; SI: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] -; SI: buffer_store_dword [[VMIN]] +; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]] +; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] +; GCN: buffer_store_dword [[VMIN]] ; EG: MIN_UINT define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind { @@ -358,11 +380,11 @@ define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i1 ; Make sure redundant sign_extend_inreg removed. ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16: -; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc -; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]] -; SI: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] -; SI: buffer_store_dword [[VMIN]] +; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]] +; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] +; GCN: buffer_store_dword [[VMIN]] ; EG: MIN_INT define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind { @@ -377,7 +399,7 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 } ; FUNC-LABEL: {{^}}s_test_imin_sle_i16: -; SI: s_min_i32 +; GCN: s_min_i32 ; EG: MIN_INT define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { @@ -389,7 +411,7 @@ define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwin ; 64 bit ; FUNC-LABEL: {{^}}test_umin_ult_i64 -; SI: s_endpgm +; GCN: s_endpgm ; EG: MIN_UINT ; EG: MIN_UINT @@ -401,7 +423,7 @@ define void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind } ; FUNC-LABEL: {{^}}test_umin_ule_i64 -; SI: s_endpgm +; GCN: s_endpgm ; EG: MIN_UINT ; EG: MIN_UINT @@ -413,7 +435,7 @@ define void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind } ; FUNC-LABEL: {{^}}test_imin_slt_i64 -; SI: s_endpgm +; GCN: s_endpgm ; EG-DAG: MIN_UINT ; EG-DAG: MIN_INT @@ -425,7 +447,7 @@ define void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind } ; FUNC-LABEL: {{^}}test_imin_sle_i64 -; SI: s_endpgm +; GCN: s_endpgm ; EG-DAG: MIN_UINT ; EG-DAG: MIN_INT diff --git a/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll new file mode 100644 index 000000000000..866a4a9191e2 --- /dev/null +++ b/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=r600 -mcpu=cypress -start-after safe-stack %s -o - | FileCheck %s +; Don't crash + +; CHECK: MAX_UINT +define void @test(i64 addrspace(1)* %out) { +bb: + store i64 2, i64 addrspace(1)* %out + %tmp = load i64, i64 addrspace(1)* %out + br label %jump + +jump: ; preds = %bb + %tmp1 = icmp ugt i64 %tmp, 4 + %umax = select i1 %tmp1, i64 %tmp, i64 4 + store i64 %umax, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/store-private.ll b/test/CodeGen/AMDGPU/store-private.ll new file mode 100644 index 000000000000..33d27f24e9cf --- /dev/null +++ b/test/CodeGen/AMDGPU/store-private.ll @@ -0,0 +1,743 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}store_i1: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_byte +define void @store_i1(i1 addrspace(0)* %out) { +entry: + store i1 true, i1 addrspace(0)* %out + ret void +} + +; i8 store +; FUNC-LABEL: {{^}}store_i8: +; EG: LSHR * [[ADDRESS:T[0-9]\.[XYZW]]], KC0[2].Y, literal.x +; EG-NEXT: 2 +; EG: MOVA_INT * AR.x (MASKED) +; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x + +; IG 0: Get the byte index and truncate the value +; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x +; EG-NEXT: 3(4.203895e-45) +; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x +; EG-NEXT: 255(3.573311e-43) + +; EG: NOT_INT +; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]] +; EG: OR_INT * [[RES:T[0-9]\.[XYZW]]] +; TODO: Is the reload necessary? +; EG: MOVA_INT * AR.x (MASKED), [[ADDRESS]] +; EG: MOV * T(0 + AR.x).X+, [[RES]] + +; SI: buffer_store_byte + +define void @store_i8(i8 addrspace(0)* %out, i8 %in) { +entry: + store i8 %in, i8 addrspace(0)* %out + ret void +} + +; i16 store +; FUNC-LABEL: {{^}}store_i16: +; EG: LSHR * [[ADDRESS:T[0-9]\.[XYZW]]], KC0[2].Y, literal.x +; EG-NEXT: 2 +; EG: MOVA_INT * AR.x (MASKED) +; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x + +; IG 0: Get the byte index and truncate the value +; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x +; EG-NEXT: 3(4.203895e-45) +; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x +; EG-NEXT: 65535(9.183409e-41) + +; EG: NOT_INT +; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]] +; EG: OR_INT * [[RES:T[0-9]\.[XYZW]]] +; TODO: Is the reload necessary? +; EG: MOVA_INT * AR.x (MASKED), [[ADDRESS]] +; EG: MOV * T(0 + AR.x).X+, [[RES]] + +; SI: buffer_store_short +define void @store_i16(i16 addrspace(0)* %out, i16 %in) { +entry: + store i16 %in, i16 addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_i24: +; SI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 +; SI-DAG: buffer_store_byte +; SI-DAG: buffer_store_short + +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store can be eliminated +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store can be eliminated +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +define void @store_i24(i24 addrspace(0)* %out, i24 %in) { +entry: + store i24 %in, i24 addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_i25: +; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}} +; SI: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]] +; SI: buffer_store_dword [[VAND]] + +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG-NOT: MOVA_INT + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM-NOT: MOVA_INT +define void @store_i25(i25 addrspace(0)* %out, i25 %in) { +entry: + store i25 %in, i25 addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v2i8: +; v2i8 is naturally 2B aligned, treat as i16 +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG-NOT: MOVA_INT + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM-NOT: MOVA_INT + +; SI: buffer_store_short +define void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) { +entry: + %0 = trunc <2 x i32> %in to <2 x i8> + store <2 x i8> %0, <2 x i8> addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v2i8_unaligned: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_byte +define void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) { +entry: + %0 = trunc <2 x i32> %in to <2 x i8> + store <2 x i8> %0, <2 x i8> addrspace(0)* %out, align 1 + ret void +} + + +; FUNC-LABEL: {{^}}store_v2i16: +; v2i8 is naturally 2B aligned, treat as i16 +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG-NOT: MOVA_INT + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM-NOT: MOVA_INT + +; SI: buffer_store_dword +define void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) { +entry: + %0 = trunc <2 x i32> %in to <2 x i16> + store <2 x i16> %0, <2 x i16> addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v2i16_unaligned: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_short +; SI: buffer_store_short +define void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) { +entry: + %0 = trunc <2 x i32> %in to <2 x i16> + store <2 x i16> %0, <2 x i16> addrspace(0)* %out, align 2 + ret void +} + +; FUNC-LABEL: {{^}}store_v4i8: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG-NOT: MOVA_INT + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM-NOT: MOVA_INT + +; SI: buffer_store_dword +define void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) { +entry: + %0 = trunc <4 x i32> %in to <4 x i8> + store <4 x i8> %0, <4 x i8> addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v4i8_unaligned: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI-NOT: buffer_store_dword +define void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) { +entry: + %0 = trunc <4 x i32> %in to <4 x i8> + store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 1 + ret void +} + +; FUNC-LABEL: {{^}}store_v8i8_unaligned: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI-NOT: buffer_store_dword +define void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) { +entry: + %0 = trunc <8 x i32> %in to <8 x i8> + store <8 x i8> %0, <8 x i8> addrspace(0)* %out, align 1 + ret void +} + +; FUNC-LABEL: {{^}}store_v4i8_halfaligned: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; TODO: This load and store cannot be eliminated, +; they might be different locations +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_short +; SI: buffer_store_short +; SI-NOT: buffer_store_dword +define void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) { +entry: + %0 = trunc <4 x i32> %in to <4 x i8> + store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 2 + ret void +} + +; floating-point store +; FUNC-LABEL: {{^}}store_f32: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_dword + +define void @store_f32(float addrspace(0)* %out, float %in) { + store float %in, float addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v4i16: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x2? +; XSI: buffer_store_dwordx2 +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) { +entry: + %0 = trunc <4 x i32> %in to <4 x i16> + store <4 x i16> %0, <4 x i16> addrspace(0)* %out + ret void +} + +; vec2 floating-point stores +; FUNC-LABEL: {{^}}store_v2f32: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x2? +; XSI: buffer_store_dwordx2 +; SI: buffer_store_dword +; SI: buffer_store_dword + +define void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) { +entry: + %0 = insertelement <2 x float> , float %a, i32 0 + %1 = insertelement <2 x float> %0, float %b, i32 1 + store <2 x float> %1, <2 x float> addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v3i32: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x2? +; XSI-DAG: buffer_store_dwordx2 +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword + +define void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind { + store <3 x i32> %a, <3 x i32> addrspace(0)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}store_v4i32: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x4? +; XSI: buffer_store_dwordx4 +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) { +entry: + store <4 x i32> %in, <4 x i32> addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_v4i32_unaligned: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x4? +; XSI: buffer_store_dwordx4 +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) { +entry: + store <4 x i32> %in, <4 x i32> addrspace(0)* %out, align 4 + ret void +} + +; v4f32 store +; FUNC-LABEL: {{^}}store_v4f32: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x4? +; XSI: buffer_store_dwordx4 +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) { + %1 = load <4 x float>, <4 x float> addrspace(0) * %in + store <4 x float> %1, <4 x float> addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_i64_i8: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_byte +define void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) { +entry: + %0 = trunc i64 %in to i8 + store i8 %0, i8 addrspace(0)* %out + ret void +} + +; FUNC-LABEL: {{^}}store_i64_i16: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; SI: buffer_store_short +define void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) { +entry: + %0 = trunc i64 %in to i16 + store i16 %0, i16 addrspace(0)* %out + ret void +} + +; The stores in this function are combined by the optimizer to create a +; 64-bit store with 32-bit alignment. This is legal and the legalizer +; should not try to split the 64-bit store back into 2 32-bit stores. + +; FUNC-LABEL: {{^}}vecload2: +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x2? +; XSI: buffer_store_dwordx2 +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { +entry: + %0 = load i32, i32 addrspace(2)* %mem, align 4 + %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1 + %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4 + store i32 %0, i32 addrspace(0)* %out, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1 + store i32 %1, i32 addrspace(0)* %arrayidx1, align 4 + ret void +} + +; When i128 was a legal type this program generated cannot select errors: + +; FUNC-LABEL: {{^}}"i128-const-store": +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, +; EG: MOVA_INT +; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, + +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, +; CM: MOVA_INT +; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, + +;TODO: why not x4? +; XSI: buffer_store_dwordx4 +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @i128-const-store(i32 addrspace(0)* %out) { +entry: + store i32 1, i32 addrspace(0)* %out, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1 + store i32 1, i32 addrspace(0)* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 2 + store i32 2, i32 addrspace(0)* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 3 + store i32 2, i32 addrspace(0)* %arrayidx6, align 4 + ret void +} + + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AVR/intrinsics/read_register.ll b/test/CodeGen/AVR/intrinsics/read_register.ll new file mode 100644 index 000000000000..3f28d1d3a9fe --- /dev/null +++ b/test/CodeGen/AVR/intrinsics/read_register.ll @@ -0,0 +1,17 @@ +; RUN: llc -O0 < %s -march=avr | FileCheck %s + +; CHECK-LABEL: foo +define void @foo() { +entry: + %val1 = call i16 @llvm.read_register.i16(metadata !0) + %val2 = call i16 @llvm.read_register.i16(metadata !1) + %val3 = call i8 @llvm.read_register.i8(metadata !2) + ret void +} + +declare i8 @llvm.read_register.i8(metadata) +declare i16 @llvm.read_register.i16(metadata) + +!0 = !{!"r28"} +!1 = !{!"Z"} +!2 = !{!"r0"} diff --git a/test/CodeGen/WebAssembly/function-bitcasts.ll b/test/CodeGen/WebAssembly/function-bitcasts.ll new file mode 100644 index 000000000000..49980da6eb8f --- /dev/null +++ b/test/CodeGen/WebAssembly/function-bitcasts.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test that function pointer casts are replaced with wrappers. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: test: +; CHECK-NEXT: call .Lbitcast@FUNCTION{{$}} +; CHECK-NEXT: call .Lbitcast.1@FUNCTION{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: call .Lbitcast.2@FUNCTION, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.call $drop=, .Lbitcast.3@FUNCTION{{$}} +; CHECK-NEXT: call foo2@FUNCTION{{$}} +; CHECK-NEXT: call foo3@FUNCTION{{$}} +; CHECK-NEXT: .endfunc + +; CHECK-LABEL: .Lbitcast: +; CHECK-NEXT: .local i32 +; CHECK-NEXT: call has_i32_arg@FUNCTION, $0{{$}} +; CHECK-NEXT: .endfunc + +; CHECK-LABEL: .Lbitcast.1: +; CHECK-NEXT: call $drop=, has_i32_ret@FUNCTION{{$}} +; CHECK-NEXT: .endfunc + +; CHECK-LABEL: .Lbitcast.2: +; CHECK-NEXT: .param i32 +; CHECK-NEXT: call foo0@FUNCTION{{$}} +; CHECK-NEXT: .endfunc + +; CHECK-LABEL: .Lbitcast.3: +; CHECK-NEXT: .result i32 +; CHECK-NEXT: .local i32 +; CHECK-NEXT: call foo1@FUNCTION{{$}} +; CHECK-NEXT: copy_local $push0=, $0 +; CHECK-NEXT: .endfunc + +declare void @has_i32_arg(i32) +declare i32 @has_i32_ret() + +declare void @foo0() +declare void @foo1() +declare void @foo2() +declare void @foo3() + +define void @test() { +entry: + call void bitcast (void (i32)* @has_i32_arg to void ()*)() + call void bitcast (i32 ()* @has_i32_ret to void ()*)() + call void bitcast (void ()* @foo0 to void (i32)*)(i32 0) + %t = call i32 bitcast (void ()* @foo1 to i32 ()*)() + call void bitcast (void ()* @foo2 to void ()*)() + call void @foo3() + ret void +} diff --git a/test/CodeGen/WebAssembly/unsupported-function-bitcasts.ll b/test/CodeGen/WebAssembly/unsupported-function-bitcasts.ll new file mode 100644 index 000000000000..ef4318ec299b --- /dev/null +++ b/test/CodeGen/WebAssembly/unsupported-function-bitcasts.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test that function pointer casts that require conversions are not converted +; to wrappers. In theory some conversions could be supported, but currently no +; conversions are implemented. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: test: +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} +; CHECK-NEXT: call has_i64_arg@FUNCTION, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.call $drop=, has_i64_ret@FUNCTION{{$}} +; CHECK-NEXT: .endfunc + +; CHECK-NOT: .Lbitcast + +declare void @has_i64_arg(i64) +declare i64 @has_i64_ret() + +define void @test() { +entry: + call void bitcast (void (i64)* @has_i64_arg to void (i32)*)(i32 0) + %t = call i32 bitcast (i64 ()* @has_i64_ret to i32 ()*)() + ret void +} diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll index e1341624cad3..aec74424b9b2 100644 --- a/test/CodeGen/X86/avx2-arith.ll +++ b/test/CodeGen/X86/avx2-arith.ll @@ -142,17 +142,108 @@ define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone ret <16 x i16> %x } -define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { +define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { +; X32-LABEL: mul_v16i8: +; X32: ## BB#0: +; X32-NEXT: vpmovsxbw %xmm1, %ymm1 +; X32-NEXT: vpmovsxbw %xmm0, %ymm0 +; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; X32-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X32-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; X32-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; X32-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; X32-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: mul_v16i8: +; X64: ## BB#0: +; X64-NEXT: vpmovsxbw %xmm1, %ymm1 +; X64-NEXT: vpmovsxbw %xmm0, %ymm0 +; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; X64-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; X64-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vzeroupper +; X64-NEXT: retq %x = mul <16 x i8> %i, %j ret <16 x i8> %x } -define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { +define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { +; X32-LABEL: mul_v32i8: +; X32: ## BB#0: +; X32-NEXT: vextracti128 $1, %ymm1, %xmm2 +; X32-NEXT: vpmovsxbw %xmm2, %ymm2 +; X32-NEXT: vextracti128 $1, %ymm0, %xmm3 +; X32-NEXT: vpmovsxbw %xmm3, %ymm3 +; X32-NEXT: vpmullw %ymm2, %ymm3, %ymm2 +; X32-NEXT: vextracti128 $1, %ymm2, %xmm3 +; X32-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; X32-NEXT: vpshufb %xmm4, %xmm3, %xmm3 +; X32-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; X32-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X32-NEXT: vpmovsxbw %xmm1, %ymm1 +; X32-NEXT: vpmovsxbw %xmm0, %ymm0 +; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; X32-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X32-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; X32-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; X32-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: mul_v32i8: +; X64: ## BB#0: +; X64-NEXT: vextracti128 $1, %ymm1, %xmm2 +; X64-NEXT: vpmovsxbw %xmm2, %ymm2 +; X64-NEXT: vextracti128 $1, %ymm0, %xmm3 +; X64-NEXT: vpmovsxbw %xmm3, %ymm3 +; X64-NEXT: vpmullw %ymm2, %ymm3, %ymm2 +; X64-NEXT: vextracti128 $1, %ymm2, %xmm3 +; X64-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; X64-NEXT: vpshufb %xmm4, %xmm3, %xmm3 +; X64-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; X64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-NEXT: vpmovsxbw %xmm1, %ymm1 +; X64-NEXT: vpmovsxbw %xmm0, %ymm0 +; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X64-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; X64-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; X64-NEXT: retq %x = mul <32 x i8> %i, %j ret <32 x i8> %x } -define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { +define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { +; X32-LABEL: mul_v4i64: +; X32: ## BB#0: +; X32-NEXT: vpsrlq $32, %ymm0, %ymm2 +; X32-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 +; X32-NEXT: vpsrlq $32, %ymm1, %ymm3 +; X32-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 +; X32-NEXT: vpaddq %ymm2, %ymm3, %ymm2 +; X32-NEXT: vpsllq $32, %ymm2, %ymm2 +; X32-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: mul_v4i64: +; X64: ## BB#0: +; X64-NEXT: vpsrlq $32, %ymm0, %ymm2 +; X64-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 +; X64-NEXT: vpsrlq $32, %ymm1, %ymm3 +; X64-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 +; X64-NEXT: vpaddq %ymm2, %ymm3, %ymm2 +; X64-NEXT: vpsllq $32, %ymm2, %ymm2 +; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; X64-NEXT: retq %x = mul <4 x i64> %i, %j ret <4 x i64> %x } @@ -291,8 +382,8 @@ define <8 x i32> @mul_const9(<8 x i32> %x) { ret <8 x i32> %y } +; %x * 0x01010101 define <4 x i32> @mul_const10(<4 x i32> %x) { - ; %x * 0x01010101 ; X32-LABEL: mul_const10: ; X32: ## BB#0: ; X32-NEXT: vpbroadcastd LCPI22_0, %xmm1 @@ -308,8 +399,8 @@ define <4 x i32> @mul_const10(<4 x i32> %x) { ret <4 x i32> %m } +; %x * 0x80808080 define <4 x i32> @mul_const11(<4 x i32> %x) { - ; %x * 0x80808080 ; X32-LABEL: mul_const11: ; X32: ## BB#0: ; X32-NEXT: vpbroadcastd LCPI23_0, %xmm1 diff --git a/test/CodeGen/X86/avx512-bugfix-23634.ll b/test/CodeGen/X86/avx512-bugfix-23634.ll index 0dcfb7c169f3..e66eefdb8e9f 100644 --- a/test/CodeGen/X86/avx512-bugfix-23634.ll +++ b/test/CodeGen/X86/avx512-bugfix-23634.ll @@ -15,7 +15,7 @@ define void @f_fu(float* %ret, float* %aa, float %b) { ; CHECK-NEXT: vpsrad $1, %zmm2, %zmm2 ; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA ; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vpblendmd {{.*}}(%rip), %zmm1, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1 {%k1} ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll index 532678ae72fa..1a91bc1dee9a 100644 --- a/test/CodeGen/X86/avx512-calling-conv.ll +++ b/test/CodeGen/X86/avx512-calling-conv.ll @@ -25,8 +25,7 @@ define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) { ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1} -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -48,8 +47,7 @@ define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) { ; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL_X32-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL_X32-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1} -; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0 ; KNL_X32-NEXT: retl %c = and <16 x i1>%a, %b @@ -65,8 +63,7 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) { ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1} -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqw %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -88,8 +85,7 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) { ; KNL_X32-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1} -; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL_X32-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0 ; KNL_X32-NEXT: retl %c = and <8 x i1>%a, %b @@ -180,8 +176,7 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) { ; KNL-NEXT: Lcfi1: ; KNL-NEXT: .cfi_def_cfa_offset 16 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: callq _func16xi1 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero @@ -210,8 +205,7 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) { ; KNL_X32-NEXT: Lcfi1: ; KNL_X32-NEXT: .cfi_def_cfa_offset 16 ; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL_X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0 ; KNL_X32-NEXT: calll _func16xi1 ; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero @@ -285,8 +279,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { ; KNL-NEXT: movb $85, %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1} -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqw %zmm0, %xmm0 ; KNL-NEXT: popq %rax ; KNL-NEXT: retq @@ -322,8 +315,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { ; KNL_X32-NEXT: movb $85, %al ; KNL_X32-NEXT: kmovw %eax, %k1 ; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1} -; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL_X32-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL_X32-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0 ; KNL_X32-NEXT: addl $12, %esp ; KNL_X32-NEXT: retl diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index c2eb19d16650..5e50a3aef2f2 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -740,8 +740,7 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { ; KNL: ## BB#0: ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; KNL-NEXT: retq ; @@ -805,11 +804,10 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) { ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; KNL-NEXT: vcmpltpd %zmm1, %zmm2, %k1 ; KNL-NEXT: vcmpltpd %zmm0, %zmm2, %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; KNL-NEXT: vpmovqd %zmm0, %ymm0 ; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovqd %zmm1, %ymm1 ; KNL-NEXT: vcvtdq2pd %ymm1, %zmm1 ; KNL-NEXT: retq @@ -834,8 +832,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; KNL: ## BB#0: ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqd %zmm0, %ymm0 ; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 ; KNL-NEXT: retq @@ -858,8 +855,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) { ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; KNL-NEXT: vcmpltps %zmm0, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqd %zmm0, %ymm0 ; KNL-NEXT: vcvtdq2ps %ymm0, %ymm0 ; KNL-NEXT: retq diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll index 32bd0804d637..03d6127ae5dc 100644 --- a/test/CodeGen/X86/avx512-ext.ll +++ b/test/CodeGen/X86/avx512-ext.ll @@ -345,9 +345,9 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq ; @@ -369,9 +369,9 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: vpmovsxbd (%rdi), %ymm1 +; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq ; @@ -704,9 +704,9 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq ; @@ -728,9 +728,9 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: vpmovsxwd (%rdi), %ymm1 +; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq ; @@ -762,9 +762,9 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; KNL-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq ; @@ -1457,8 +1457,7 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { ; KNL-LABEL: sext_16i1_16i32: ; KNL: ## BB#0: ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; SKX-LABEL: sext_16i1_16i32: diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 26d14fa0840f..cb8ed0e59a3a 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -365,11 +365,10 @@ define i16 @test16(i1 *%addr, i16 %a) { ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %esi, %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z} -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15] -; KNL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; KNL-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -402,11 +401,10 @@ define i8 @test17(i1 *%addr, i8 %a) { ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %esi, %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z} -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7] -; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 +; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1242,30 +1240,29 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) ; KNL-NEXT: vpextrd $1, %xmm0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vmovd %xmm0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k2} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,8,2,3,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm2, %zmm3, %zmm4 -; KNL-NEXT: vpsllq $63, %zmm4, %zmm2 -; KNL-NEXT: vptestmq %zmm2, %zmm2, %k2 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z} -; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,8,3,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm3, %zmm2, %zmm4 -; KNL-NEXT: vpsllq $63, %zmm4, %zmm2 -; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] +; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 +; KNL-NEXT: vpsllq $63, %zmm3, %zmm1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k2 +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] +; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 +; KNL-NEXT: vpsllq $63, %zmm3, %zmm1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpextrd $3, %xmm0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,8,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm0 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] +; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 +; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: retq @@ -1306,11 +1303,10 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) ; KNL-NEXT: vmovq %xmm0, %rax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z} -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 +; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index d48f63536e0e..b127585dc87b 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -344,8 +344,7 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; KNL-NEXT: LBB17_1: ; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 ; KNL-NEXT: LBB17_3: -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -382,8 +381,7 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; KNL-NEXT: LBB18_3: ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -472,8 +470,7 @@ define <16 x i1> @test15(i32 %x, i32 %y) { ; KNL-NEXT: movw $1, %cx ; KNL-NEXT: cmovgw %ax, %cx ; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -510,28 +507,27 @@ define <64 x i8> @test16(i64 %x) { ; KNL-NEXT: movl %edi, (%rsp) ; KNL-NEXT: shrq $32, %rdi ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: kmovw (%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; KNL-NEXT: movl $1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1 -; KNL-NEXT: vpsllw $7, %ymm2, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vpmovdb %zmm2, %xmm2 +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq @@ -574,30 +570,29 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; KNL-NEXT: movl %edi, (%rsp) ; KNL-NEXT: shrq $32, %rdi ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: kmovw (%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vpmovdb %zmm1, %xmm1 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %al ; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vpmovdb %zmm2, %xmm2 +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq @@ -635,18 +630,17 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftlw $6, %k2, %k2 ; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z} -; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k2} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,2,3,4,5,8,7] -; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; KNL-NEXT: vpsllq $63, %zmm3, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] +; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 +; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 ; KNL-NEXT: kshiftlw $7, %k0, %k0 ; KNL-NEXT: korw %k0, %k1, %k1 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqw %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -1387,8 +1381,7 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) { ; KNL: ## BB#0: ; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; SKX-LABEL: load_8i1: @@ -1405,8 +1398,7 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) { ; KNL-LABEL: load_16i1: ; KNL: ## BB#0: ; KNL-NEXT: kmovw (%rdi), %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; SKX-LABEL: load_16i1: @@ -1424,8 +1416,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { ; KNL: ## BB#0: ; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 ; KNL-NEXT: retq ; @@ -1444,8 +1435,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) { ; KNL: ## BB#0: ; KNL-NEXT: movzbl (%rdi), %eax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqd %zmm0, %ymm0 ; KNL-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; KNL-NEXT: retq @@ -1465,10 +1455,9 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) { ; KNL: ## BB#0: ; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: kmovw 2(%rdi), %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdw %zmm1, %ymm1 ; KNL-NEXT: retq ; @@ -1489,17 +1478,16 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) { ; KNL-NEXT: kmovw 2(%rdi), %k2 ; KNL-NEXT: kmovw 4(%rdi), %k3 ; KNL-NEXT: kmovw 6(%rdi), %k4 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k4} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z} +; KNL-NEXT: vpmovdb %zmm1, %xmm1 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z} +; KNL-NEXT: vpmovdb %zmm2, %xmm2 +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: load_64i1: diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll index 2a0de05608b4..9234ae838cff 100644 --- a/test/CodeGen/X86/avx512-mov.ll +++ b/test/CodeGen/X86/avx512-mov.ll @@ -313,7 +313,7 @@ define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] +; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* @@ -327,7 +327,7 @@ define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] +; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* @@ -369,7 +369,7 @@ define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* @@ -383,7 +383,7 @@ define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] +; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* @@ -426,7 +426,7 @@ define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* @@ -441,7 +441,7 @@ define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] +; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* @@ -486,7 +486,7 @@ define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] +; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* @@ -501,7 +501,7 @@ define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] -; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] +; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* diff --git a/test/CodeGen/X86/avx512-regcall-NoMask.ll b/test/CodeGen/X86/avx512-regcall-NoMask.ll index ce8fca036c91..a29c1e4628a1 100644 --- a/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -325,11 +325,13 @@ define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) { } ; X32-LABEL: test_argRet128Vector: -; X32: vpblend{{.*}} %xmm0, %xmm1, %xmm0 +; X32: vmovdqa{{.*}} %xmm0, %xmm1 +; X32: vmovdqa{{.*}} %xmm1, %xmm0 ; X32: ret{{.*}} ; WIN64-LABEL: test_argRet128Vector: -; WIN64: vpblend{{.*}} %xmm0, %xmm1, %xmm0 +; WIN64: vmovdqa{{.*}} %xmm0, %xmm1 +; WIN64: vmovdqa{{.*}} %xmm1, %xmm0 ; WIN64: ret{{.*}} ; Test regcall when receiving/returning 128 bit vector @@ -341,13 +343,13 @@ define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %b) ; X32-LABEL: test_CallargRet128Vector: ; X32: vmov{{.*}} %xmm0, {{%xmm([0-7])}} ; X32: call{{.*}} {{.*}}test_argRet128Vector -; X32: vpblend{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0 +; X32: vmovdqa{{.*}} {{%xmm([0-7])}}, %xmm0 ; X32: ret{{.*}} ; WIN64-LABEL: test_CallargRet128Vector: ; WIN64: vmov{{.*}} %xmm0, {{%xmm([0-9]+)}} ; WIN64: call{{.*}} {{.*}}test_argRet128Vector -; WIN64: vpblend{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0 +; WIN64: vmovdqa{{.*}} {{%xmm([0-9]+)}}, %xmm0 ; WIN64: ret{{.*}} ; Test regcall when passing/retrieving 128 bit vector @@ -358,11 +360,13 @@ define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) { } ; X32-LABEL: test_argRet256Vector: -; X32: vpblend{{.*}} %ymm0, %ymm1, %ymm0 +; X32: vmovdqa{{.*}} %ymm0, %ymm1 +; X32: vmovdqa{{.*}} %ymm1, %ymm0 ; X32: ret{{.*}} ; WIN64-LABEL: test_argRet256Vector: -; WIN64: vpblend{{.*}} %ymm0, %ymm1, %ymm0 +; WIN64: vmovdqa{{.*}} %ymm0, %ymm1 +; WIN64: vmovdqa{{.*}} %ymm1, %ymm0 ; WIN64: ret{{.*}} ; Test regcall when receiving/returning 256 bit vector @@ -374,13 +378,13 @@ define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %b) ; X32-LABEL: test_CallargRet256Vector: ; X32: vmov{{.*}} %ymm0, %ymm1 ; X32: call{{.*}} {{.*}}test_argRet256Vector -; X32: vpblend{{.*}} %ymm1, %ymm0, %ymm0 +; X32: vmovdqa{{.*}} %ymm1, %ymm0 ; X32: ret{{.*}} ; WIN64-LABEL: test_CallargRet256Vector: ; WIN64: vmov{{.*}} %ymm0, %ymm1 ; WIN64: call{{.*}} {{.*}}test_argRet256Vector -; WIN64: vpblend{{.*}} %ymm1, %ymm0, %ymm0 +; WIN64: vmovdqa{{.*}} %ymm1, %ymm0 ; WIN64: ret{{.*}} ; Test regcall when passing/retrieving 256 bit vector @@ -391,11 +395,13 @@ define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i32> %a) { } ; X32-LABEL: test_argRet512Vector: -; X32: vpblend{{.*}} %zmm0, %zmm1, %zmm0 +; X32: vmovdqa{{.*}} %zmm0, %zmm1 +; X32: vmovdqa{{.*}} %zmm1, %zmm0 ; X32: ret{{.*}} ; WIN64-LABEL: test_argRet512Vector: -; WIN64: vpblend{{.*}} %zmm0, %zmm1, %zmm0 +; WIN64: vmovdqa{{.*}} %zmm0, %zmm1 +; WIN64: vmovdqa{{.*}} %zmm1, %zmm0 ; WIN64: ret{{.*}} ; Test regcall when receiving/returning 512 bit vector @@ -407,13 +413,13 @@ define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> ; X32-LABEL: test_CallargRet512Vector: ; X32: vmov{{.*}} %zmm0, %zmm1 ; X32: call{{.*}} {{.*}}test_argRet512Vector -; X32: vpblend{{.*}} %zmm1, %zmm0, %zmm0 +; X32: movdqa{{.*}} %zmm1, %zmm0 ; X32: ret{{.*}} ; WIN64-LABEL: test_CallargRet512Vector: ; WIN64: vmov{{.*}} %zmm0, %zmm1 ; WIN64: call{{.*}} {{.*}}test_argRet512Vector -; WIN64: vpblend{{.*}} %zmm1, %zmm0, %zmm0 +; WIN64: vmovdqa{{.*}} %zmm1, %zmm0 ; WIN64: ret{{.*}} ; Test regcall when passing/retrieving 512 bit vector diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll index 840239b9011a..1991ee4f3376 100644 --- a/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/test/CodeGen/X86/avx512-vbroadcast.ll @@ -218,8 +218,7 @@ define <16 x i32> @test_vbroadcast() { ; ALL: # BB#0: # %entry ; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1 -; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; ALL-NEXT: knotw %k1, %k1 ; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} ; ALL-NEXT: retq diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index bd269ea87a35..361ee1ddbf9d 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -6,7 +6,8 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = fcmp ole <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y @@ -17,7 +18,8 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = fcmp ole <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y @@ -28,7 +30,8 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin ; CHECK-LABEL: test3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %yp, align 4 %mask = icmp eq <16 x i32> %x, %y @@ -40,7 +43,8 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) ; CHECK-LABEL: test4_unsigned: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp uge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y @@ -51,7 +55,8 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: test5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp eq <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y @@ -62,7 +67,8 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun ; CHECK-LABEL: test6_unsigned: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp ugt <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y @@ -81,7 +87,8 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { ; SKX: ## BB#0: ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 -; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} +; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %mask = fcmp olt <4 x float> %a, zeroinitializer @@ -101,7 +108,8 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { ; SKX: ## BB#0: ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 -; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} +; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} +; SKX-NEXT: vmovapd %xmm1, %xmm0 ; SKX-NEXT: retq %mask = fcmp olt <2 x double> %a, zeroinitializer %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b @@ -114,14 +122,15 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { ; KNL-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovdqa %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 -; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 ; SKX-NEXT: retq %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y @@ -134,14 +143,15 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { ; KNL-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test10: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 -; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 ; SKX-NEXT: retq %mask = fcmp oeq <8 x float> %x, %y @@ -658,9 +668,9 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { ; CHECK-LABEL: test14: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1 -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <16 x i32> %a, %b %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a @@ -673,9 +683,9 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { ; CHECK-LABEL: test15: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 ; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1 -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <8 x i64> %a, %b %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a @@ -689,7 +699,8 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind ; CHECK-LABEL: test16: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp sge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y @@ -700,7 +711,8 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-LABEL: test17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sgt <16 x i32> %x, %y @@ -712,7 +724,8 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-LABEL: test18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sle <16 x i32> %x, %y @@ -724,7 +737,8 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-LABEL: test19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp ule <16 x i32> %x, %y @@ -737,7 +751,8 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp eq <16 x i32> %x1, %y1 %mask0 = icmp eq <16 x i32> %x, %y @@ -751,7 +766,8 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} -; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %mask0 = icmp sle <8 x i64> %x, %y @@ -765,7 +781,8 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sgt <8 x i64> %x1, %y1 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 @@ -780,7 +797,8 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 @@ -794,7 +812,8 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { ; CHECK-LABEL: test24: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 @@ -808,7 +827,8 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind ; CHECK-LABEL: test25: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 @@ -823,7 +843,8 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -840,7 +861,8 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -858,8 +880,7 @@ define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1 ; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ; KNL-NEXT: kxnorw %k1, %k0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovqd %zmm0, %ymm0 ; KNL-NEXT: retq ; @@ -883,8 +904,7 @@ define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; @@ -912,7 +932,8 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { ; SKX-LABEL: test30: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 -; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovapd %ymm1, %ymm0 ; SKX-NEXT: retq %mask = fcmp oeq <4 x double> %x, %y @@ -930,7 +951,8 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp ; SKX-LABEL: test31: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 -; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} +; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} +; SKX-NEXT: vmovapd %xmm1, %xmm0 ; SKX-NEXT: retq %y = load <2 x double>, <2 x double>* %yp, align 4 @@ -949,7 +971,8 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp ; SKX-LABEL: test32: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 -; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovapd %ymm1, %ymm0 ; SKX-NEXT: retq %y = load <4 x double>, <4 x double>* %yp, align 4 @@ -962,7 +985,8 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp ; CHECK-LABEL: test33: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <8 x double>, <8 x double>* %yp, align 4 %mask = fcmp olt <8 x double> %x, %y @@ -980,7 +1004,8 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no ; SKX-LABEL: test34: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 -; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} +; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %y = load <4 x float>, <4 x float>* %yp, align 4 %mask = fcmp olt <4 x float> %x, %y @@ -995,14 +1020,15 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vmovups (%rdi), %ymm2 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 -; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test35: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 -; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 ; SKX-NEXT: retq %y = load <8 x float>, <8 x float>* %yp, align 4 @@ -1015,7 +1041,8 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp ; CHECK-LABEL: test36: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 -; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <16 x float>, <16 x float>* %yp, align 4 %mask = fcmp olt <16 x float> %x, %y @@ -1027,7 +1054,8 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou ; CHECK-LABEL: test37: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %a = load double, double* %ptr @@ -1050,7 +1078,8 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou ; SKX-LABEL: test38: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 -; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovapd %ymm1, %ymm0 ; SKX-NEXT: retq %a = load double, double* %ptr @@ -1073,7 +1102,8 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou ; SKX-LABEL: test39: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 -; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} +; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} +; SKX-NEXT: vmovapd %xmm1, %xmm0 ; SKX-NEXT: retq %a = load double, double* %ptr @@ -1090,7 +1120,8 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n ; CHECK-LABEL: test40: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq %a = load float, float* %ptr @@ -1109,14 +1140,15 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vbroadcastss (%rdi), %ymm2 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 -; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovaps %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test41: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 -; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} +; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 ; SKX-NEXT: retq %a = load float, float* %ptr @@ -1139,7 +1171,8 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun ; SKX-LABEL: test42: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 -; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} +; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} +; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq %a = load float, float* %ptr @@ -1158,7 +1191,8 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} -; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; KNL-NEXT: vmovapd %zmm1, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test43: @@ -1166,7 +1200,8 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x ; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ; SKX-NEXT: vpmovw2m %xmm2, %k1 ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} -; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; SKX-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; SKX-NEXT: vmovapd %zmm1, %zmm0 ; SKX-NEXT: retq %a = load double, double* %ptr diff --git a/test/CodeGen/X86/avx512bw-mov.ll b/test/CodeGen/X86/avx512bw-mov.ll index c58b3cc8c3cd..11bb431414a0 100644 --- a/test/CodeGen/X86/avx512bw-mov.ll +++ b/test/CodeGen/X86/avx512bw-mov.ll @@ -26,7 +26,7 @@ define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ne <64 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <64 x i8>* @@ -74,7 +74,7 @@ define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ne <32 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <32 x i16>* diff --git a/test/CodeGen/X86/avx512bw-vec-cmp.ll b/test/CodeGen/X86/avx512bw-vec-cmp.ll index 016837e61307..34432468921b 100644 --- a/test/CodeGen/X86/avx512bw-vec-cmp.ll +++ b/test/CodeGen/X86/avx512bw-vec-cmp.ll @@ -5,7 +5,8 @@ define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp eq <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y @@ -16,7 +17,8 @@ define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp sgt <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y @@ -27,7 +29,8 @@ define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind ; CHECK-LABEL: test3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k1 -; CHECK-NEXT: vpblendmw %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp sge <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y @@ -38,7 +41,8 @@ define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind { ; CHECK-LABEL: test4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask = icmp ugt <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y @@ -49,7 +53,8 @@ define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwin ; CHECK-LABEL: test5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %yp, align 4 %mask = icmp eq <32 x i16> %x, %y @@ -61,7 +66,8 @@ define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun ; CHECK-LABEL: test6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp sgt <32 x i16> %x, %y @@ -73,7 +79,8 @@ define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun ; CHECK-LABEL: test7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp sle <32 x i16> %x, %y @@ -85,7 +92,8 @@ define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun ; CHECK-LABEL: test8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 -; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp ule <32 x i16> %x, %y @@ -98,7 +106,8 @@ define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqw %zmm3, %zmm2, %k1 {%k1} -; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp eq <32 x i16> %x1, %y1 %mask0 = icmp eq <32 x i16> %x, %y @@ -112,7 +121,8 @@ define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleb %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpleb %zmm2, %zmm3, %k1 {%k1} -; CHECK-NEXT: vpblendmb %zmm0, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <64 x i8> %x1, %y1 %mask0 = icmp sle <64 x i8> %x, %y @@ -126,7 +136,8 @@ define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpcmpgtb (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sgt <64 x i8> %x1, %y1 %y = load <64 x i8>, <64 x i8>* %y.ptr, align 4 @@ -141,7 +152,8 @@ define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <32 x i16> %x1, %y1 %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 diff --git a/test/CodeGen/X86/avx512bwvl-mov.ll b/test/CodeGen/X86/avx512bwvl-mov.ll index 209f18ba7f9c..3f92641a3e16 100644 --- a/test/CodeGen/X86/avx512bwvl-mov.ll +++ b/test/CodeGen/X86/avx512bwvl-mov.ll @@ -26,7 +26,7 @@ define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04] -; CHECK-NEXT: vpblendmb (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x66,0x07] +; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <32 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <32 x i8>* @@ -74,7 +74,7 @@ define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04] -; CHECK-NEXT: vpblendmw (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x66,0x07] +; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i16>* @@ -122,7 +122,7 @@ define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04] -; CHECK-NEXT: vpblendmb (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x66,0x07] +; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i8>* @@ -170,7 +170,7 @@ define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04] -; CHECK-NEXT: vpblendmw (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x66,0x07] +; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i16>* diff --git a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll index 17e581bbb501..3e7f0acae78b 100644 --- a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll +++ b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll @@ -5,7 +5,8 @@ define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind { ; CHECK-LABEL: test256_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1 -; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = icmp eq <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y @@ -16,7 +17,8 @@ define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind ; CHECK-LABEL: test256_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k1 -; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %mask = icmp sgt <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1 @@ -27,7 +29,8 @@ define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounw ; CHECK-LABEL: test256_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k1 -; CHECK-NEXT: vpblendmw %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm2, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = icmp sge <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y @@ -38,7 +41,8 @@ define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind ; CHECK-LABEL: test256_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k1 -; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %mask = icmp ugt <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1 @@ -49,7 +53,8 @@ define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nou ; CHECK-LABEL: test256_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %yp, align 4 %mask = icmp eq <16 x i16> %x, %y @@ -61,7 +66,8 @@ define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) ; CHECK-LABEL: test256_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp sgt <16 x i16> %x, %y @@ -73,7 +79,8 @@ define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) ; CHECK-LABEL: test256_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp sle <16 x i16> %x, %y @@ -85,7 +92,8 @@ define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) ; CHECK-LABEL: test256_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp ule <16 x i16> %x, %y @@ -98,7 +106,8 @@ define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpeqw %ymm3, %ymm2, %k1 {%k1} -; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp eq <16 x i16> %x1, %y1 %mask0 = icmp eq <16 x i16> %x, %y @@ -112,7 +121,8 @@ define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpleb %ymm2, %ymm3, %k1 {%k1} -; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sge <32 x i8> %x1, %y1 %mask0 = icmp sle <32 x i8> %x, %y @@ -126,7 +136,8 @@ define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sgt <32 x i8> %x1, %y1 %y = load <32 x i8>, <32 x i8>* %y.ptr, align 4 @@ -141,7 +152,8 @@ define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i16> %x1, %y1 %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 @@ -155,7 +167,8 @@ define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: test128_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask = icmp eq <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y @@ -166,7 +179,8 @@ define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind ; CHECK-LABEL: test128_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %mask = icmp sgt <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1 @@ -177,7 +191,8 @@ define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind ; CHECK-LABEL: test128_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k1 -; CHECK-NEXT: vpblendmw %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm2, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask = icmp sge <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y @@ -188,7 +203,8 @@ define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind ; CHECK-LABEL: test128_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %mask = icmp ugt <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1 @@ -199,7 +215,8 @@ define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwin ; CHECK-LABEL: test128_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %yp, align 4 %mask = icmp eq <8 x i16> %x, %y @@ -211,7 +228,8 @@ define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun ; CHECK-LABEL: test128_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp sgt <8 x i16> %x, %y @@ -223,7 +241,8 @@ define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun ; CHECK-LABEL: test128_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp sle <8 x i16> %x, %y @@ -235,7 +254,8 @@ define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun ; CHECK-LABEL: test128_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp ule <8 x i16> %x, %y @@ -248,7 +268,8 @@ define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpeqw %xmm3, %xmm2, %k1 {%k1} -; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp eq <8 x i16> %x1, %y1 %mask0 = icmp eq <8 x i16> %x, %y @@ -262,7 +283,8 @@ define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpleb %xmm2, %xmm3, %k1 {%k1} -; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i8> %x1, %y1 %mask0 = icmp sle <16 x i8> %x, %y @@ -276,7 +298,8 @@ define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sgt <16 x i8> %x1, %y1 %y = load <16 x i8>, <16 x i8>* %y.ptr, align 4 @@ -291,7 +314,8 @@ define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmplew %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i16> %x1, %y1 %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 diff --git a/test/CodeGen/X86/avx512vl-mov.ll b/test/CodeGen/X86/avx512vl-mov.ll index e37fd76377e3..af449d6628c4 100644 --- a/test/CodeGen/X86/avx512vl-mov.ll +++ b/test/CodeGen/X86/avx512vl-mov.ll @@ -166,7 +166,7 @@ define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] +; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* @@ -180,7 +180,7 @@ define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] +; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* @@ -222,7 +222,7 @@ define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] +; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* @@ -236,7 +236,7 @@ define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] +; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* @@ -279,7 +279,7 @@ define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] -; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* @@ -294,7 +294,7 @@ define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] -; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* @@ -338,7 +338,7 @@ define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] +; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* @@ -352,7 +352,7 @@ define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] -; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] +; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* @@ -554,7 +554,7 @@ define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] +; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* @@ -568,7 +568,7 @@ define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] +; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* @@ -610,7 +610,7 @@ define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] +; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* @@ -624,7 +624,7 @@ define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] +; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* @@ -666,7 +666,7 @@ define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* @@ -680,7 +680,7 @@ define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* @@ -722,7 +722,7 @@ define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] +; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* @@ -736,7 +736,7 @@ define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1 ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] -; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] +; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll index e0acf2be653e..25b9cc79096f 100644 --- a/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -5,7 +5,8 @@ define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: test256_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = icmp eq <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y @@ -16,7 +17,8 @@ define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind ; CHECK-LABEL: test256_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = icmp sgt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -27,7 +29,8 @@ define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind ; CHECK-LABEL: test256_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1 -; CHECK-NEXT: vpblendmd %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = icmp sge <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y @@ -38,7 +41,8 @@ define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind ; CHECK-LABEL: test256_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 -; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = icmp ugt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -49,7 +53,8 @@ define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwin ; CHECK-LABEL: test256_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %x, %y @@ -61,7 +66,8 @@ define <8 x i32> @test256_5b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_5b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %y, %x @@ -73,7 +79,8 @@ define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sgt <8 x i32> %x, %y @@ -85,7 +92,8 @@ define <8 x i32> @test256_6b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou ; CHECK-LABEL: test256_6b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp slt <8 x i32> %y, %x @@ -97,7 +105,8 @@ define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sle <8 x i32> %x, %y @@ -109,7 +118,8 @@ define <8 x i32> @test256_7b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou ; CHECK-LABEL: test256_7b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sge <8 x i32> %y, %x @@ -121,7 +131,8 @@ define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun ; CHECK-LABEL: test256_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp ule <8 x i32> %x, %y @@ -133,7 +144,8 @@ define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou ; CHECK-LABEL: test256_8b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp uge <8 x i32> %y, %x @@ -146,7 +158,8 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1} -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp eq <8 x i32> %x1, %y1 %mask0 = icmp eq <8 x i32> %x, %y @@ -160,7 +173,8 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1 ; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1} -; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %mask0 = icmp sle <4 x i64> %x, %y @@ -174,7 +188,8 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sgt <4 x i64> %x1, %y1 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4 @@ -189,7 +204,8 @@ define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1} -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 @@ -203,7 +219,8 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind ; CHECK-LABEL: test256_13: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 @@ -217,7 +234,8 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind ; CHECK-LABEL: test256_14: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0 @@ -232,7 +250,8 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1} -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -249,7 +268,8 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1} -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -265,7 +285,8 @@ define <8 x i32> @test256_17(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp ne <8 x i32> %x, %y @@ -277,7 +298,8 @@ define <8 x i32> @test256_18(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp ne <8 x i32> %y, %x @@ -289,7 +311,8 @@ define <8 x i32> @test256_19(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp uge <8 x i32> %x, %y @@ -301,7 +324,8 @@ define <8 x i32> @test256_20(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi ; CHECK-LABEL: test256_20: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp uge <8 x i32> %y, %x @@ -313,7 +337,8 @@ define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: test128_1: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask = icmp eq <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y @@ -324,7 +349,8 @@ define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind ; CHECK-LABEL: test128_2: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask = icmp sgt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -335,7 +361,8 @@ define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind ; CHECK-LABEL: test128_3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1 -; CHECK-NEXT: vpblendmd %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask = icmp sge <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y @@ -346,7 +373,8 @@ define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind ; CHECK-LABEL: test128_4: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask = icmp ugt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -357,7 +385,8 @@ define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwin ; CHECK-LABEL: test128_5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %x, %y @@ -369,7 +398,8 @@ define <4 x i32> @test128_5b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwi ; CHECK-LABEL: test128_5b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %y, %x @@ -381,7 +411,8 @@ define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_6: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sgt <4 x i32> %x, %y @@ -393,7 +424,8 @@ define <4 x i32> @test128_6b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_6b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp slt <4 x i32> %y, %x @@ -405,7 +437,8 @@ define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_7: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sle <4 x i32> %x, %y @@ -417,7 +450,8 @@ define <4 x i32> @test128_7b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_7b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sge <4 x i32> %y, %x @@ -429,7 +463,8 @@ define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun ; CHECK-LABEL: test128_8: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ule <4 x i32> %x, %y @@ -441,7 +476,8 @@ define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_8b: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x @@ -454,7 +490,8 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1} -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp eq <4 x i32> %x1, %y1 %mask0 = icmp eq <4 x i32> %x, %y @@ -468,7 +505,8 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 ; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1} -; CHECK-NEXT: vpblendmq %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %mask0 = icmp sle <2 x i64> %x, %y @@ -482,7 +520,8 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sgt <2 x i64> %x1, %y1 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4 @@ -497,7 +536,8 @@ define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -511,7 +551,8 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind ; CHECK-LABEL: test128_13: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1 -; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 @@ -525,7 +566,8 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind ; CHECK-LABEL: test128_14: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 @@ -540,7 +582,8 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -557,7 +600,8 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1} -; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -573,7 +617,8 @@ define <4 x i32> @test128_17(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %x, %y @@ -585,7 +630,8 @@ define <4 x i32> @test128_18(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %y, %x @@ -597,7 +643,8 @@ define <4 x i32> @test128_19(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %x, %y @@ -609,7 +656,8 @@ define <4 x i32> @test128_20(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou ; CHECK-LABEL: test128_20: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 -; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll index 8e9bc8b5af4b..0060539c691f 100644 --- a/test/CodeGen/X86/cmov.ll +++ b/test/CodeGen/X86/cmov.ll @@ -157,16 +157,12 @@ define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ret i8 %d } -; FIXME: The 'not' is redundant. - define i32 @smin(i32 %x) { ; CHECK-LABEL: smin: ; CHECK: ## BB#0: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: notl %ecx ; CHECK-NEXT: xorl $-1, %edi ; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovsl %ecx, %eax +; CHECK-NEXT: cmovsl %edi, %eax ; CHECK-NEXT: retq %not_x = xor i32 %x, -1 %1 = icmp slt i32 %not_x, -1 diff --git a/test/CodeGen/X86/fma-fneg-combine.ll b/test/CodeGen/X86/fma-fneg-combine.ll index 5636a5bcd73e..5329f5b216a4 100644 --- a/test/CodeGen/X86/fma-fneg-combine.ll +++ b/test/CodeGen/X86/fma-fneg-combine.ll @@ -222,9 +222,9 @@ define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i ; SKX-NEXT: kmovw %edi, %k1 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3 ; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 -; SKX-NEXT: vblendmps %zmm1, %zmm3, %zmm1 {%k1} -; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1} +; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} +; SKX-NEXT: vmovaps %zmm3, %zmm0 ; SKX-NEXT: retq ; ; KNL-LABEL: test15: @@ -232,9 +232,9 @@ define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3 ; KNL-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 -; KNL-NEXT: vblendmps %zmm1, %zmm3, %zmm1 {%k1} -; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm1 {%k1} -; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: vmovaps %zmm1, %zmm3 {%k1} +; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} +; KNL-NEXT: vmovaps %zmm3, %zmm0 ; KNL-NEXT: retq entry: %sub.i = fsub <16 x float> , %a diff --git a/test/CodeGen/X86/fmaddsub-combine.ll b/test/CodeGen/X86/fmaddsub-combine.ll new file mode 100644 index 000000000000..f3b13cd053b4 --- /dev/null +++ b/test/CodeGen/X86/fmaddsub-combine.ll @@ -0,0 +1,129 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s + +; This test checks the fusing of MUL + ADDSUB to FMADDSUB. + +define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 { +; FMA3-LABEL: mul_addsub_pd128: +; FMA3: # BB#0: # %entry +; FMA3-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 +; FMA3-NEXT: retq +; +; FMA4-LABEL: mul_addsub_pd128: +; FMA4: # BB#0: # %entry +; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +entry: + %AB = fmul <2 x double> %A, %B + %Sub = fsub <2 x double> %AB, %C + %Add = fadd <2 x double> %AB, %C + %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> + ret <2 x double> %Addsub +} + +define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 { +; FMA3-LABEL: mul_addsub_ps128: +; FMA3: # BB#0: # %entry +; FMA3-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 +; FMA3-NEXT: retq +; +; FMA4-LABEL: mul_addsub_ps128: +; FMA4: # BB#0: # %entry +; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 +; FMA4-NEXT: retq +entry: + %AB = fmul <4 x float> %A, %B + %Sub = fsub <4 x float> %AB, %C + %Add = fadd <4 x float> %AB, %C + %Addsub = shufflevector <4 x float> %Sub, <4 x float> %Add, <4 x i32> + ret <4 x float> %Addsub +} + +define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 { +; FMA3-LABEL: mul_addsub_pd256: +; FMA3: # BB#0: # %entry +; FMA3-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 +; FMA3-NEXT: retq +; +; FMA4-LABEL: mul_addsub_pd256: +; FMA4: # BB#0: # %entry +; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: retq +entry: + %AB = fmul <4 x double> %A, %B + %Sub = fsub <4 x double> %AB, %C + %Add = fadd <4 x double> %AB, %C + %Addsub = shufflevector <4 x double> %Sub, <4 x double> %Add, <4 x i32> + ret <4 x double> %Addsub +} + +define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 { +; FMA3-LABEL: mul_addsub_ps256: +; FMA3: # BB#0: # %entry +; FMA3-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 +; FMA3-NEXT: retq +; +; FMA4-LABEL: mul_addsub_ps256: +; FMA4: # BB#0: # %entry +; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 +; FMA4-NEXT: retq +entry: + %AB = fmul <8 x float> %A, %B + %Sub = fsub <8 x float> %AB, %C + %Add = fadd <8 x float> %AB, %C + %Addsub = shufflevector <8 x float> %Sub, <8 x float> %Add, <8 x i32> + ret <8 x float> %Addsub +} + +define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 { +; FMA3_256-LABEL: mul_addsub_pd512: +; FMA3_256: # BB#0: # %entry +; FMA3_256-NEXT: vfmaddsub213pd %ymm4, %ymm2, %ymm0 +; FMA3_256-NEXT: vfmaddsub213pd %ymm5, %ymm3, %ymm1 +; FMA3_256-NEXT: retq +; +; FMA3_512-LABEL: mul_addsub_pd512: +; FMA3_512: # BB#0: # %entry +; FMA3_512-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 +; FMA3_512-NEXT: retq +; +; FMA4-LABEL: mul_addsub_pd512: +; FMA4: # BB#0: # %entry +; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +entry: + %AB = fmul <8 x double> %A, %B + %Sub = fsub <8 x double> %AB, %C + %Add = fadd <8 x double> %AB, %C + %Addsub = shufflevector <8 x double> %Sub, <8 x double> %Add, <8 x i32> + ret <8 x double> %Addsub +} + +define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 { +; FMA3_256-LABEL: mul_addsub_ps512: +; FMA3_256: # BB#0: # %entry +; FMA3_256-NEXT: vfmaddsub213ps %ymm4, %ymm2, %ymm0 +; FMA3_256-NEXT: vfmaddsub213ps %ymm5, %ymm3, %ymm1 +; FMA3_256-NEXT: retq +; +; FMA3_512-LABEL: mul_addsub_ps512: +; FMA3_512: # BB#0: # %entry +; FMA3_512-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 +; FMA3_512-NEXT: retq +; +; FMA4-LABEL: mul_addsub_ps512: +; FMA4: # BB#0: # %entry +; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0 +; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1 +; FMA4-NEXT: retq +entry: + %AB = fmul <16 x float> %A, %B + %Sub = fsub <16 x float> %AB, %C + %Add = fadd <16 x float> %AB, %C + %Addsub = shufflevector <16 x float> %Sub, <16 x float> %Add, <16 x i32> + ret <16 x float> %Addsub +} + +attributes #0 = { nounwind "unsafe-fp-math"="true" } diff --git a/test/CodeGen/X86/sse-fsignum.ll b/test/CodeGen/X86/sse-fsignum.ll index 7159d4c87174..32594a27698d 100644 --- a/test/CodeGen/X86/sse-fsignum.ll +++ b/test/CodeGen/X86/sse-fsignum.ll @@ -93,15 +93,14 @@ define void @signum32b(<8 x float>*) { ; AVX512F-NEXT: vmovaps (%rdi), %ymm0 ; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3 {%k1} {z} -; AVX512F-NEXT: vpmovqd %zmm3, %ymm3 -; AVX512F-NEXT: vcvtdq2ps %ymm3, %ymm3 +; AVX512F-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 +; AVX512F-NEXT: vcvtdq2ps %ymm2, %ymm2 ; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 -; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 -; AVX512F-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; AVX512F-NEXT: vsubps %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vmovaps %ymm0, (%rdi) ; AVX512F-NEXT: retq entry: diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll index abe3da752874..c34f333ef785 100644 --- a/test/CodeGen/X86/vector-compare-results.ll +++ b/test/CodeGen/X86/vector-compare-results.ll @@ -4,6 +4,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW ; ; 128-bit vector comparisons @@ -308,12 +310,26 @@ define <16 x i1> @test_cmp_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512BW-NEXT: retq %1 = icmp sgt <16 x i16> %a0, %a1 ret <16 x i1> %1 } @@ -589,13 +605,26 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v8f64: -; AVX512: # BB#0: -; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v8f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v8f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v8f64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <8 x double> %a0, %a1 ret <8 x i1> %1 } @@ -636,13 +665,26 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16f32: -; AVX512: # BB#0: -; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16f32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16f32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <16 x float> %a0, %a1 ret <16 x i1> %1 } @@ -734,13 +776,26 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v8i64: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v8i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v8i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v8i64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <8 x i64> %a0, %a1 ret <8 x i1> %1 } @@ -784,13 +839,26 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16i32: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16i32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <16 x i32> %a0, %a1 ret <16 x i1> %1 } @@ -1045,16 +1113,35 @@ define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v32i16: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; AVX512-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v32i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v32i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v32i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <32 x i16> %a0, %a1 ret <32 x i1> %1 } @@ -1874,15 +1961,31 @@ define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v64i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 -; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vextracti128 $1, %ymm4, %xmm3 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512-NEXT: vmovdqa %xmm4, %xmm2 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v64i8: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 +; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm3 +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512F-NEXT: vmovdqa %xmm4, %xmm2 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v64i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 +; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm3 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm2 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v64i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <64 x i8> %a0, %a1 ret <64 x i1> %1 } @@ -1957,120 +2060,350 @@ define <16 x i1> @test_cmp_v16f64(<16 x double> %a0, <16 x double> %a1) nounwind ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16f64: -; AVX512: # BB#0: -; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm4 -; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm5 -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movq $-1, %rcx -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm5 -; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm6 -; AVX512-NEXT: vucomisd %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm7 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] -; AVX512-NEXT: vucomisd %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 -; AVX512-NEXT: vextractf32x4 $1, %zmm2, %xmm5 -; AVX512-NEXT: vextractf32x4 $1, %zmm0, %xmm6 -; AVX512-NEXT: vucomisd %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm7 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] -; AVX512-NEXT: vucomisd %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] -; AVX512-NEXT: vucomisd %xmm2, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vucomisd %xmm2, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vextractf32x4 $3, %zmm3, %xmm2 -; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm4 -; AVX512-NEXT: vucomisd %xmm2, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vucomisd %xmm2, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm3, %xmm4 -; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm5 -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 -; AVX512-NEXT: vextractf32x4 $1, %zmm3, %xmm4 -; AVX512-NEXT: vextractf32x4 $1, %zmm1, %xmm5 -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] -; AVX512-NEXT: vucomisd %xmm3, %xmm1 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512-NEXT: vucomisd %xmm3, %xmm1 -; AVX512-NEXT: cmovaq %rcx, %rax -; AVX512-NEXT: vmovq %rax, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512F-NEXT: vextractf32x4 $3, %zmm0, %xmm5 +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movq $-1, %rcx +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512F-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm6 +; AVX512F-NEXT: vucomisd %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm7 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] +; AVX512F-NEXT: vucomisd %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512F-NEXT: vextractf32x4 $1, %zmm2, %xmm5 +; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm6 +; AVX512F-NEXT: vucomisd %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm7 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] +; AVX512F-NEXT: vucomisd %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] +; AVX512F-NEXT: vucomisd %xmm2, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512F-NEXT: vucomisd %xmm2, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0] +; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512F-NEXT: vextractf32x4 $3, %zmm1, %xmm4 +; AVX512F-NEXT: vucomisd %xmm2, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vucomisd %xmm2, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512F-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512F-NEXT: vextractf32x4 $2, %zmm1, %xmm5 +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT: vextractf32x4 $1, %zmm3, %xmm4 +; AVX512F-NEXT: vextractf32x4 $1, %zmm1, %xmm5 +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512F-NEXT: vucomisd %xmm3, %xmm1 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512F-NEXT: vucomisd %xmm3, %xmm1 +; AVX512F-NEXT: cmovaq %rcx, %rax +; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm2, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm0, %xmm5 +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movq $-1, %rcx +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm2, %xmm5 +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm0, %xmm6 +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm7 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm2, %xmm5 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm0, %xmm6 +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm7 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm3, %xmm2 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm1, %xmm4 +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm3, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm1, %xmm5 +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm3, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm1, %xmm5 +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512DQ-NEXT: vucomisd %xmm3, %xmm1 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vucomisd %xmm3, %xmm1 +; AVX512DQ-NEXT: cmovaq %rcx, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16f64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm5 +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movq $-1, %rcx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm6 +; AVX512BW-NEXT: vucomisd %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm7 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] +; AVX512BW-NEXT: vucomisd %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm2, %xmm5 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm0, %xmm6 +; AVX512BW-NEXT: vucomisd %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm7 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0] +; AVX512BW-NEXT: vucomisd %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0] +; AVX512BW-NEXT: vucomisd %xmm2, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512BW-NEXT: vucomisd %xmm2, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm1, %xmm4 +; AVX512BW-NEXT: vucomisd %xmm2, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vucomisd %xmm2, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm1, %xmm5 +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm3, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm1, %xmm5 +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0] +; AVX512BW-NEXT: vucomisd %xmm3, %xmm1 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512BW-NEXT: vucomisd %xmm3, %xmm1 +; AVX512BW-NEXT: cmovaq %rcx, %rax +; AVX512BW-NEXT: vmovq %rax, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <16 x double> %a0, %a1 ret <16 x i1> %1 } @@ -2416,207 +2749,612 @@ define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind { ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v32f32: -; AVX512: # BB#0: -; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm4 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] -; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm6 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vucomiss %xmm5, %xmm7 -; AVX512-NEXT: movl $-1, %ecx -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm4, %xmm6 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm5 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm8 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] -; AVX512-NEXT: vucomiss %xmm7, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm8, %xmm5 -; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm4, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm5, %xmm8 -; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm5 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] -; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm7 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm6, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm5, %xmm7 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm4 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] -; AVX512-NEXT: vucomiss %xmm6, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 -; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vinserti128 $1, %xmm8, %ymm4, %ymm8 -; AVX512-NEXT: vextractf32x4 $1, %zmm2, %xmm5 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] -; AVX512-NEXT: vextractf32x4 $1, %zmm0, %xmm7 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm6, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm5, %xmm7 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm4 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] -; AVX512-NEXT: vucomiss %xmm6, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 -; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm2, %xmm0 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm5 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] -; AVX512-NEXT: vucomiss %xmm6, %xmm7 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm2, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm8 -; AVX512-NEXT: vextractf32x4 $3, %zmm3, %xmm2 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3] -; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm5 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm4, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm2, %xmm5 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm4 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] -; AVX512-NEXT: vucomiss %xmm6, %xmm7 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm2, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2 -; AVX512-NEXT: vextractf32x4 $2, %zmm3, %xmm4 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] -; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm6 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm5, %xmm7 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm4, %xmm6 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm5 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm6[1,0] -; AVX512-NEXT: vucomiss %xmm7, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm5, %xmm0 -; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm6[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512-NEXT: vextractf32x4 $1, %zmm3, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX512-NEXT: vextractf32x4 $1, %zmm1, %xmm5 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm4, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm0, %xmm5 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm4 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] -; AVX512-NEXT: vucomiss %xmm6, %xmm7 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm0, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] -; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] -; AVX512-NEXT: vucomiss %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vucomiss %xmm3, %xmm1 -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmoval %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm4 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm3[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm1[1,0] -; AVX512-NEXT: vucomiss %xmm5, %xmm6 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmoval %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 -; AVX512-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX512-NEXT: vucomiss %xmm3, %xmm1 -; AVX512-NEXT: cmoval %ecx, %eax -; AVX512-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v32f32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] +; AVX512F-NEXT: vextractf32x4 $3, %zmm0, %xmm6 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: vucomiss %xmm5, %xmm7 +; AVX512F-NEXT: movl $-1, %ecx +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm4, %xmm6 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm5 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm5, %xmm8 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] +; AVX512F-NEXT: vucomiss %xmm7, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm8, %xmm5 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm4, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm5, %xmm8 +; AVX512F-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm7 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm6, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm5, %xmm7 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm4 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] +; AVX512F-NEXT: vucomiss %xmm6, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm8, %ymm4, %ymm8 +; AVX512F-NEXT: vextractf32x4 $1, %zmm2, %xmm5 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm7 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm6, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm5, %xmm7 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm4 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] +; AVX512F-NEXT: vucomiss %xmm6, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm2, %xmm0 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm5 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512F-NEXT: vucomiss %xmm6, %xmm7 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm2, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512F-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3] +; AVX512F-NEXT: vextractf32x4 $3, %zmm1, %xmm5 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm4, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm2, %xmm5 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm4 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] +; AVX512F-NEXT: vucomiss %xmm6, %xmm7 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm2, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2 +; AVX512F-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] +; AVX512F-NEXT: vextractf32x4 $2, %zmm1, %xmm6 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm5, %xmm7 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm4, %xmm6 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm5 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm6[1,0] +; AVX512F-NEXT: vucomiss %xmm7, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm5, %xmm0 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm5 = xmm6[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-NEXT: vextractf32x4 $1, %zmm3, %xmm0 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] +; AVX512F-NEXT: vextractf32x4 $1, %zmm1, %xmm5 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm4, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm0, %xmm5 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm4 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] +; AVX512F-NEXT: vucomiss %xmm6, %xmm7 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm0, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] +; AVX512F-NEXT: vucomiss %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vucomiss %xmm3, %xmm1 +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmoval %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm4 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm3[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm1[1,0] +; AVX512F-NEXT: vucomiss %xmm5, %xmm6 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmoval %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] +; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX512F-NEXT: vucomiss %xmm3, %xmm1 +; AVX512F-NEXT: cmoval %ecx, %eax +; AVX512F-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v32f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] +; AVX512DQ-NEXT: vextractf32x4 $3, %zmm0, %xmm6 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm7 +; AVX512DQ-NEXT: movl $-1, %ecx +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm6 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm5 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm5, %xmm8 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] +; AVX512DQ-NEXT: vucomiss %xmm7, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm8, %xmm5 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm5, %xmm8 +; AVX512DQ-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512DQ-NEXT: vextractf32x4 $2, %zmm0, %xmm7 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm7 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm4 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm4, %ymm8 +; AVX512DQ-NEXT: vextractf32x4 $1, %zmm2, %xmm5 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512DQ-NEXT: vextractf32x4 $1, %zmm0, %xmm7 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm7 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm4 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm2, %xmm0 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm5 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm7 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm2, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm8, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512DQ-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3] +; AVX512DQ-NEXT: vextractf32x4 $3, %zmm1, %xmm5 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm2, %xmm5 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm4 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm7 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm2, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2 +; AVX512DQ-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] +; AVX512DQ-NEXT: vextractf32x4 $2, %zmm1, %xmm6 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm7 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm6 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm5 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm6[1,0] +; AVX512DQ-NEXT: vucomiss %xmm7, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm5, %xmm0 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm5 = xmm6[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vextractf32x4 $1, %zmm3, %xmm0 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] +; AVX512DQ-NEXT: vextractf32x4 $1, %zmm1, %xmm5 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm0, %xmm5 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm4 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] +; AVX512DQ-NEXT: vucomiss %xmm6, %xmm7 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm0, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0 +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] +; AVX512DQ-NEXT: vucomiss %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vucomiss %xmm3, %xmm1 +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmoval %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm4 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm3[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm1[1,0] +; AVX512DQ-NEXT: vucomiss %xmm5, %xmm6 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmoval %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX512DQ-NEXT: vucomiss %xmm3, %xmm1 +; AVX512DQ-NEXT: cmoval %ecx, %eax +; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v32f32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm6 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: vucomiss %xmm5, %xmm7 +; AVX512BW-NEXT: movl $-1, %ecx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm4, %xmm6 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm8 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] +; AVX512BW-NEXT: vucomiss %xmm7, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm8, %xmm5 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm4, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm5, %xmm8 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm7 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm5, %xmm7 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm4 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vinserti128 $1, %xmm8, %ymm4, %ymm8 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm2, %xmm5 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512BW-NEXT: vextractf32x4 $1, %zmm0, %xmm7 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm7[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm5, %xmm7 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm4 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm9 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm7[1,0] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm9, %xmm4 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm6 = xmm7[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm0[1,0] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm7 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm8 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3] +; AVX512BW-NEXT: vextractf32x4 $3, %zmm1, %xmm5 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm4, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm2, %xmm5 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm4 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm7 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm2, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm5 = xmm4[1,1,3,3] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm1, %xmm6 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm7 = xmm6[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm5, %xmm7 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm4, %xmm6 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm6[1,0] +; AVX512BW-NEXT: vucomiss %xmm7, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm5, %xmm0 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm6[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm3, %xmm0 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] +; AVX512BW-NEXT: vextractf32x4 $1, %zmm1, %xmm5 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm4, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm0, %xmm5 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm4 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0] +; AVX512BW-NEXT: vucomiss %xmm6, %xmm7 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm0, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0 +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] +; AVX512BW-NEXT: vucomiss %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vucomiss %xmm3, %xmm1 +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmoval %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm4 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm3[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm1[1,0] +; AVX512BW-NEXT: vucomiss %xmm5, %xmm6 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmoval %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4 +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] +; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX512BW-NEXT: vucomiss %xmm3, %xmm1 +; AVX512BW-NEXT: cmoval %ecx, %eax +; AVX512BW-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1 +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <32 x float> %a0, %a1 ret <32 x i1> %1 } @@ -2785,136 +3523,398 @@ define <16 x i1> @test_cmp_v16i64(<16 x i64> %a0, <16 x i64> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16i64: -; AVX512: # BB#0: -; AVX512-NEXT: vextracti32x4 $3, %zmm2, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rcx -; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm5 -; AVX512-NEXT: vpextrq $1, %xmm5, %rdx -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpq %rcx, %rdx -; AVX512-NEXT: movq $-1, %rcx -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] -; AVX512-NEXT: vextracti32x4 $2, %zmm2, %xmm5 -; AVX512-NEXT: vpextrq $1, %xmm5, %rdx -; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm6 -; AVX512-NEXT: vpextrq $1, %xmm6, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm7 -; AVX512-NEXT: vmovq %xmm5, %rdx -; AVX512-NEXT: vmovq %xmm6, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 -; AVX512-NEXT: vextracti32x4 $1, %zmm2, %xmm5 -; AVX512-NEXT: vpextrq $1, %xmm5, %rdx -; AVX512-NEXT: vextracti32x4 $1, %zmm0, %xmm6 -; AVX512-NEXT: vpextrq $1, %xmm6, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm7 -; AVX512-NEXT: vmovq %xmm5, %rdx -; AVX512-NEXT: vmovq %xmm6, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] -; AVX512-NEXT: vpextrq $1, %xmm2, %rdx -; AVX512-NEXT: vpextrq $1, %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vmovq %xmm2, %rdx -; AVX512-NEXT: vmovq %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vextracti32x4 $3, %zmm3, %xmm2 -; AVX512-NEXT: vpextrq $1, %xmm2, %rdx -; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vmovq %xmm2, %rdx -; AVX512-NEXT: vmovq %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] -; AVX512-NEXT: vextracti32x4 $2, %zmm3, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rdx -; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; AVX512-NEXT: vpextrq $1, %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 -; AVX512-NEXT: vextracti32x4 $1, %zmm3, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rdx -; AVX512-NEXT: vextracti32x4 $1, %zmm1, %xmm5 -; AVX512-NEXT: vpextrq $1, %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm6 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] -; AVX512-NEXT: vpextrq $1, %xmm3, %rdx -; AVX512-NEXT: vpextrq $1, %xmm1, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vmovq %xmm3, %rdx -; AVX512-NEXT: vmovq %xmm1, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: cmovgq %rcx, %rax -; AVX512-NEXT: vmovq %rax, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rcx +; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm5 +; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: cmpq %rcx, %rdx +; AVX512F-NEXT: movq $-1, %rcx +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512F-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm6 +; AVX512F-NEXT: vpextrq $1, %xmm6, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm7 +; AVX512F-NEXT: vmovq %xmm5, %rdx +; AVX512F-NEXT: vmovq %xmm6, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512F-NEXT: vextracti32x4 $1, %zmm2, %xmm5 +; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm6 +; AVX512F-NEXT: vpextrq $1, %xmm6, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm7 +; AVX512F-NEXT: vmovq %xmm5, %rdx +; AVX512F-NEXT: vmovq %xmm6, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] +; AVX512F-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vmovq %xmm2, %rdx +; AVX512F-NEXT: vmovq %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] +; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512F-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vmovq %xmm2, %rdx +; AVX512F-NEXT: vmovq %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] +; AVX512F-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; AVX512F-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm5 +; AVX512F-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm6 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512F-NEXT: vpextrq $1, %xmm3, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vmovq %xmm3, %rdx +; AVX512F-NEXT: vmovq %xmm1, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: cmovgq %rcx, %rax +; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm2, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rcx +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm0, %xmm5 +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: cmpq %rcx, %rdx +; AVX512DQ-NEXT: movq $-1, %rcx +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm2, %xmm5 +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm0, %xmm6 +; AVX512DQ-NEXT: vpextrq $1, %xmm6, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm7 +; AVX512DQ-NEXT: vmovq %xmm5, %rdx +; AVX512DQ-NEXT: vmovq %xmm6, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm2, %xmm5 +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm6 +; AVX512DQ-NEXT: vpextrq $1, %xmm6, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm7 +; AVX512DQ-NEXT: vmovq %xmm5, %rdx +; AVX512DQ-NEXT: vmovq %xmm6, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vmovq %xmm2, %rdx +; AVX512DQ-NEXT: vmovq %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm3, %xmm2 +; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm1, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vmovq %xmm2, %rdx +; AVX512DQ-NEXT: vmovq %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm3, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm1, %xmm5 +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm3, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm1, %xmm5 +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm6 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm3, %rdx +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vmovq %xmm3, %rdx +; AVX512DQ-NEXT: vmovq %xmm1, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: cmovgq %rcx, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16i64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rcx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm5 +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: cmpq %rcx, %rdx +; AVX512BW-NEXT: movq $-1, %rcx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm6 +; AVX512BW-NEXT: vpextrq $1, %xmm6, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm7 +; AVX512BW-NEXT: vmovq %xmm5, %rdx +; AVX512BW-NEXT: vmovq %xmm6, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm6 +; AVX512BW-NEXT: vpextrq $1, %xmm6, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm7 +; AVX512BW-NEXT: vmovq %xmm5, %rdx +; AVX512BW-NEXT: vmovq %xmm6, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0] +; AVX512BW-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vmovq %xmm2, %rdx +; AVX512BW-NEXT: vmovq %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vmovq %xmm2, %rdx +; AVX512BW-NEXT: vmovq %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm5 +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm6 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0] +; AVX512BW-NEXT: vpextrq $1, %xmm3, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm1, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vmovq %xmm3, %rdx +; AVX512BW-NEXT: vmovq %xmm1, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: cmovgq %rcx, %rax +; AVX512BW-NEXT: vmovq %rax, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <16 x i64> %a0, %a1 ret <16 x i1> %1 } @@ -3252,223 +4252,660 @@ define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind { ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v32i32: -; AVX512: # BB#0: -; AVX512-NEXT: vextracti32x4 $3, %zmm2, %xmm4 -; AVX512-NEXT: vpextrd $1, %xmm4, %ecx -; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm5 -; AVX512-NEXT: vpextrd $1, %xmm5, %edx -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpl %ecx, %edx -; AVX512-NEXT: movl $-1, %ecx -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm4, %esi -; AVX512-NEXT: vmovd %xmm5, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm6 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $2, %xmm4, %edx -; AVX512-NEXT: vpextrd $2, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $3, %xmm4, %edx -; AVX512-NEXT: vpextrd $3, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 -; AVX512-NEXT: vextracti32x4 $2, %zmm2, %xmm5 -; AVX512-NEXT: vpextrd $1, %xmm5, %edx -; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm6 -; AVX512-NEXT: vpextrd $1, %xmm6, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm5, %esi -; AVX512-NEXT: vmovd %xmm6, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm7 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 -; AVX512-NEXT: vpextrd $2, %xmm5, %edx -; AVX512-NEXT: vpextrd $2, %xmm6, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 -; AVX512-NEXT: vpextrd $3, %xmm5, %edx -; AVX512-NEXT: vpextrd $3, %xmm6, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 -; AVX512-NEXT: vextracti32x4 $1, %zmm2, %xmm5 -; AVX512-NEXT: vpextrd $1, %xmm5, %edx -; AVX512-NEXT: vextracti32x4 $1, %zmm0, %xmm6 -; AVX512-NEXT: vpextrd $1, %xmm6, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm5, %esi -; AVX512-NEXT: vmovd %xmm6, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm7 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 -; AVX512-NEXT: vpextrd $2, %xmm5, %edx -; AVX512-NEXT: vpextrd $2, %xmm6, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 -; AVX512-NEXT: vpextrd $3, %xmm5, %edx -; AVX512-NEXT: vpextrd $3, %xmm6, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 -; AVX512-NEXT: vpextrd $1, %xmm2, %edx -; AVX512-NEXT: vpextrd $1, %xmm0, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm2, %esi -; AVX512-NEXT: vmovd %xmm0, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm6 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $2, %xmm2, %edx -; AVX512-NEXT: vpextrd $2, %xmm0, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $3, %xmm2, %edx -; AVX512-NEXT: vpextrd $3, %xmm0, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm6, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vextracti32x4 $3, %zmm3, %xmm2 -; AVX512-NEXT: vpextrd $1, %xmm2, %edx -; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512-NEXT: vpextrd $1, %xmm4, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm2, %esi -; AVX512-NEXT: vmovd %xmm4, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm5 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpextrd $2, %xmm2, %edx -; AVX512-NEXT: vpextrd $2, %xmm4, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpextrd $3, %xmm2, %edx -; AVX512-NEXT: vpextrd $3, %xmm4, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm5, %xmm2 -; AVX512-NEXT: vextracti32x4 $2, %zmm3, %xmm4 -; AVX512-NEXT: vpextrd $1, %xmm4, %edx -; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; AVX512-NEXT: vpextrd $1, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm4, %esi -; AVX512-NEXT: vmovd %xmm5, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm6 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $2, %xmm4, %edx -; AVX512-NEXT: vpextrd $2, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $3, %xmm4, %edx -; AVX512-NEXT: vpextrd $3, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 -; AVX512-NEXT: vextracti32x4 $1, %zmm3, %xmm4 -; AVX512-NEXT: vpextrd $1, %xmm4, %edx -; AVX512-NEXT: vextracti32x4 $1, %zmm1, %xmm5 -; AVX512-NEXT: vpextrd $1, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm4, %esi -; AVX512-NEXT: vmovd %xmm5, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm6 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $2, %xmm4, %edx -; AVX512-NEXT: vpextrd $2, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 -; AVX512-NEXT: vpextrd $3, %xmm4, %edx -; AVX512-NEXT: vpextrd $3, %xmm5, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 -; AVX512-NEXT: vpextrd $1, %xmm3, %edx -; AVX512-NEXT: vpextrd $1, %xmm1, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vmovd %xmm3, %esi -; AVX512-NEXT: vmovd %xmm1, %edi -; AVX512-NEXT: cmpl %esi, %edi -; AVX512-NEXT: movl $0, %esi -; AVX512-NEXT: cmovgl %ecx, %esi -; AVX512-NEXT: vmovd %esi, %xmm5 -; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpextrd $2, %xmm3, %edx -; AVX512-NEXT: vpextrd $2, %xmm1, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgl %ecx, %edx -; AVX512-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 -; AVX512-NEXT: vpextrd $3, %xmm3, %edx -; AVX512-NEXT: vpextrd $3, %xmm1, %esi -; AVX512-NEXT: cmpl %edx, %esi -; AVX512-NEXT: cmovgl %ecx, %eax -; AVX512-NEXT: vpinsrd $3, %eax, %xmm5, %xmm1 -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v32i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512F-NEXT: vpextrd $1, %xmm4, %ecx +; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm5 +; AVX512F-NEXT: vpextrd $1, %xmm5, %edx +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: cmpl %ecx, %edx +; AVX512F-NEXT: movl $-1, %ecx +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm4, %esi +; AVX512F-NEXT: vmovd %xmm5, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm6 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $2, %xmm4, %edx +; AVX512F-NEXT: vpextrd $2, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $3, %xmm4, %edx +; AVX512F-NEXT: vpextrd $3, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512F-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512F-NEXT: vpextrd $1, %xmm5, %edx +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm6 +; AVX512F-NEXT: vpextrd $1, %xmm6, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm5, %esi +; AVX512F-NEXT: vmovd %xmm6, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm7 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 +; AVX512F-NEXT: vpextrd $2, %xmm5, %edx +; AVX512F-NEXT: vpextrd $2, %xmm6, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 +; AVX512F-NEXT: vpextrd $3, %xmm5, %edx +; AVX512F-NEXT: vpextrd $3, %xmm6, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512F-NEXT: vextracti32x4 $1, %zmm2, %xmm5 +; AVX512F-NEXT: vpextrd $1, %xmm5, %edx +; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm6 +; AVX512F-NEXT: vpextrd $1, %xmm6, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm5, %esi +; AVX512F-NEXT: vmovd %xmm6, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm7 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 +; AVX512F-NEXT: vpextrd $2, %xmm5, %edx +; AVX512F-NEXT: vpextrd $2, %xmm6, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 +; AVX512F-NEXT: vpextrd $3, %xmm5, %edx +; AVX512F-NEXT: vpextrd $3, %xmm6, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 +; AVX512F-NEXT: vpextrd $1, %xmm2, %edx +; AVX512F-NEXT: vpextrd $1, %xmm0, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm2, %esi +; AVX512F-NEXT: vmovd %xmm0, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm6 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $2, %xmm2, %edx +; AVX512F-NEXT: vpextrd $2, %xmm0, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $3, %xmm2, %edx +; AVX512F-NEXT: vpextrd $3, %xmm0, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm6, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512F-NEXT: vpextrd $1, %xmm2, %edx +; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; AVX512F-NEXT: vpextrd $1, %xmm4, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm2, %esi +; AVX512F-NEXT: vmovd %xmm4, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm5 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpextrd $2, %xmm2, %edx +; AVX512F-NEXT: vpextrd $2, %xmm4, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpextrd $3, %xmm2, %edx +; AVX512F-NEXT: vpextrd $3, %xmm4, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm5, %xmm2 +; AVX512F-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512F-NEXT: vpextrd $1, %xmm4, %edx +; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; AVX512F-NEXT: vpextrd $1, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm4, %esi +; AVX512F-NEXT: vmovd %xmm5, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm6 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $2, %xmm4, %edx +; AVX512F-NEXT: vpextrd $2, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $3, %xmm4, %edx +; AVX512F-NEXT: vpextrd $3, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512F-NEXT: vpextrd $1, %xmm4, %edx +; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm5 +; AVX512F-NEXT: vpextrd $1, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm4, %esi +; AVX512F-NEXT: vmovd %xmm5, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm6 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $2, %xmm4, %edx +; AVX512F-NEXT: vpextrd $2, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512F-NEXT: vpextrd $3, %xmm4, %edx +; AVX512F-NEXT: vpextrd $3, %xmm5, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512F-NEXT: vpextrd $1, %xmm3, %edx +; AVX512F-NEXT: vpextrd $1, %xmm1, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vmovd %xmm3, %esi +; AVX512F-NEXT: vmovd %xmm1, %edi +; AVX512F-NEXT: cmpl %esi, %edi +; AVX512F-NEXT: movl $0, %esi +; AVX512F-NEXT: cmovgl %ecx, %esi +; AVX512F-NEXT: vmovd %esi, %xmm5 +; AVX512F-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpextrd $2, %xmm3, %edx +; AVX512F-NEXT: vpextrd $2, %xmm1, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgl %ecx, %edx +; AVX512F-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512F-NEXT: vpextrd $3, %xmm3, %edx +; AVX512F-NEXT: vpextrd $3, %xmm1, %esi +; AVX512F-NEXT: cmpl %edx, %esi +; AVX512F-NEXT: cmovgl %ecx, %eax +; AVX512F-NEXT: vpinsrd $3, %eax, %xmm5, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v32i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512DQ-NEXT: vpextrd $1, %xmm4, %ecx +; AVX512DQ-NEXT: vextracti32x4 $3, %zmm0, %xmm5 +; AVX512DQ-NEXT: vpextrd $1, %xmm5, %edx +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: cmpl %ecx, %edx +; AVX512DQ-NEXT: movl $-1, %ecx +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm4, %esi +; AVX512DQ-NEXT: vmovd %xmm5, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm6 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $2, %xmm4, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $3, %xmm4, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512DQ-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512DQ-NEXT: vpextrd $1, %xmm5, %edx +; AVX512DQ-NEXT: vextracti32x4 $2, %zmm0, %xmm6 +; AVX512DQ-NEXT: vpextrd $1, %xmm6, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm5, %esi +; AVX512DQ-NEXT: vmovd %xmm6, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm7 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 +; AVX512DQ-NEXT: vpextrd $2, %xmm5, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm6, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 +; AVX512DQ-NEXT: vpextrd $3, %xmm5, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm6, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512DQ-NEXT: vextracti32x4 $1, %zmm2, %xmm5 +; AVX512DQ-NEXT: vpextrd $1, %xmm5, %edx +; AVX512DQ-NEXT: vextracti32x4 $1, %zmm0, %xmm6 +; AVX512DQ-NEXT: vpextrd $1, %xmm6, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm5, %esi +; AVX512DQ-NEXT: vmovd %xmm6, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm7 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 +; AVX512DQ-NEXT: vpextrd $2, %xmm5, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm6, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 +; AVX512DQ-NEXT: vpextrd $3, %xmm5, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm6, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 +; AVX512DQ-NEXT: vpextrd $1, %xmm2, %edx +; AVX512DQ-NEXT: vpextrd $1, %xmm0, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm2, %esi +; AVX512DQ-NEXT: vmovd %xmm0, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm6 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $2, %xmm2, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm0, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $3, %xmm2, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm0, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm6, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm4, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512DQ-NEXT: vpextrd $1, %xmm2, %edx +; AVX512DQ-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; AVX512DQ-NEXT: vpextrd $1, %xmm4, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm2, %esi +; AVX512DQ-NEXT: vmovd %xmm4, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm5 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpextrd $2, %xmm2, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm4, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpextrd $3, %xmm2, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm4, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm5, %xmm2 +; AVX512DQ-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512DQ-NEXT: vpextrd $1, %xmm4, %edx +; AVX512DQ-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; AVX512DQ-NEXT: vpextrd $1, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm4, %esi +; AVX512DQ-NEXT: vmovd %xmm5, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm6 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $2, %xmm4, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $3, %xmm4, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512DQ-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512DQ-NEXT: vpextrd $1, %xmm4, %edx +; AVX512DQ-NEXT: vextracti32x4 $1, %zmm1, %xmm5 +; AVX512DQ-NEXT: vpextrd $1, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm4, %esi +; AVX512DQ-NEXT: vmovd %xmm5, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm6 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $2, %xmm4, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512DQ-NEXT: vpextrd $3, %xmm4, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm5, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512DQ-NEXT: vpextrd $1, %xmm3, %edx +; AVX512DQ-NEXT: vpextrd $1, %xmm1, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vmovd %xmm3, %esi +; AVX512DQ-NEXT: vmovd %xmm1, %edi +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: movl $0, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %esi +; AVX512DQ-NEXT: vmovd %esi, %xmm5 +; AVX512DQ-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpextrd $2, %xmm3, %edx +; AVX512DQ-NEXT: vpextrd $2, %xmm1, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgl %ecx, %edx +; AVX512DQ-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpextrd $3, %xmm3, %edx +; AVX512DQ-NEXT: vpextrd $3, %xmm1, %esi +; AVX512DQ-NEXT: cmpl %edx, %esi +; AVX512DQ-NEXT: cmovgl %ecx, %eax +; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm5, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v32i32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vpextrd $1, %xmm4, %ecx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm5 +; AVX512BW-NEXT: vpextrd $1, %xmm5, %edx +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: cmpl %ecx, %edx +; AVX512BW-NEXT: movl $-1, %ecx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm4, %esi +; AVX512BW-NEXT: vmovd %xmm5, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $2, %xmm4, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $3, %xmm4, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrd $1, %xmm5, %edx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm6 +; AVX512BW-NEXT: vpextrd $1, %xmm6, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm5, %esi +; AVX512BW-NEXT: vmovd %xmm6, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm7 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrd $2, %xmm5, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm6, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrd $3, %xmm5, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm6, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrd $1, %xmm5, %edx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm6 +; AVX512BW-NEXT: vpextrd $1, %xmm6, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm5, %esi +; AVX512BW-NEXT: vmovd %xmm6, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm7 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrd $2, %xmm5, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm6, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrd $3, %xmm5, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm6, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm7, %xmm5 +; AVX512BW-NEXT: vpextrd $1, %xmm2, %edx +; AVX512BW-NEXT: vpextrd $1, %xmm0, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm2, %esi +; AVX512BW-NEXT: vmovd %xmm0, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $2, %xmm2, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm0, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $3, %xmm2, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm0, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm6, %xmm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vpextrd $1, %xmm2, %edx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; AVX512BW-NEXT: vpextrd $1, %xmm4, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm2, %esi +; AVX512BW-NEXT: vmovd %xmm4, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrd $2, %xmm2, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm4, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrd $3, %xmm2, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm4, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm5, %xmm2 +; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrd $1, %xmm4, %edx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; AVX512BW-NEXT: vpextrd $1, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm4, %esi +; AVX512BW-NEXT: vmovd %xmm5, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $2, %xmm4, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $3, %xmm4, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrd $1, %xmm4, %edx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm5 +; AVX512BW-NEXT: vpextrd $1, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm4, %esi +; AVX512BW-NEXT: vmovd %xmm5, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $2, %xmm4, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrd $3, %xmm4, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm5, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4 +; AVX512BW-NEXT: vpextrd $1, %xmm3, %edx +; AVX512BW-NEXT: vpextrd $1, %xmm1, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vmovd %xmm3, %esi +; AVX512BW-NEXT: vmovd %xmm1, %edi +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgl %ecx, %esi +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrd $2, %xmm3, %edx +; AVX512BW-NEXT: vpextrd $2, %xmm1, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgl %ecx, %edx +; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrd $3, %xmm3, %edx +; AVX512BW-NEXT: vpextrd $3, %xmm1, %esi +; AVX512BW-NEXT: cmpl %edx, %esi +; AVX512BW-NEXT: cmovgl %ecx, %eax +; AVX512BW-NEXT: vpinsrd $3, %eax, %xmm5, %xmm1 +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <32 x i32> %a0, %a1 ret <32 x i1> %1 } @@ -4342,291 +5779,987 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v64i16: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 -; AVX512-NEXT: vpmovsxwd %ymm3, %zmm3 -; AVX512-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512-NEXT: kshiftlw $14, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: kshiftlw $15, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm3 -; AVX512-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $13, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $12, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $11, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $10, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $9, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $8, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $7, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $6, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $5, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $4, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $3, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $2, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $1, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftrw $15, %k0, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2 -; AVX512-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512-NEXT: kshiftlw $14, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: kshiftlw $15, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm2 -; AVX512-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $13, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $12, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $11, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $10, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $9, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $8, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $7, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $6, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $5, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $4, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $3, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $2, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $1, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftrw $15, %k0, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512-NEXT: vpsllw $7, %ymm2, %ymm2 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vpxor %ymm6, %ymm6, %ymm6 -; AVX512-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2 -; AVX512-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 -; AVX512-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512-NEXT: kshiftlw $14, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: kshiftlw $15, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm1 -; AVX512-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $13, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $12, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $11, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $10, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $9, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $8, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $7, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $6, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $5, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $4, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $3, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $2, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $1, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftrw $15, %k0, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX512-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kshiftlw $14, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: kshiftlw $15, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 -; AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $13, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $12, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $11, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $10, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $9, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $8, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $7, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $6, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $5, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $4, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $3, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $2, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $1, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovw %k1, %eax -; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftrw $15, %k0, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpcmpgtb %ymm0, %ymm6, %ymm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512-NEXT: # kill: %XMM2 %XMM2 %YMM2 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v64i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 +; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3 +; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm3 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2 +; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 +; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm2 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpxor %ymm6, %ymm6, %ymm6 +; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2 +; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm1 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm6, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512F-NEXT: # kill: %XMM2 %XMM2 %YMM2 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v64i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3 +; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 +; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 +; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %ecx +; AVX512DQ-NEXT: vmovd %ecx, %xmm3 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2 +; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 +; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %ecx +; AVX512DQ-NEXT: vmovd %ecx, %xmm2 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpxor %ymm6, %ymm6, %ymm6 +; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2 +; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %ecx +; AVX512DQ-NEXT: vmovd %ecx, %xmm1 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %ecx +; AVX512DQ-NEXT: vmovd %ecx, %xmm0 +; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kmovw %k1, %eax +; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm6, %ymm0 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: # kill: %XMM2 %XMM2 %YMM2 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v64i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vpextrw $1, %xmm4, %ecx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm5 +; AVX512BW-NEXT: vpextrw $1, %xmm5, %edx +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: cmpw %cx, %dx +; AVX512BW-NEXT: movw $-1, %cx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm4, %esi +; AVX512BW-NEXT: vmovd %xmm5, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $2, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $3, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $4, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $5, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $6, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $7, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm6, %xmm4 +; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrw $1, %xmm5, %edx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm6 +; AVX512BW-NEXT: vpextrw $1, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm5, %esi +; AVX512BW-NEXT: vmovd %xmm6, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm7 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $2, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $3, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $4, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $5, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $6, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $7, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm7, %xmm5 +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrw $1, %xmm5, %edx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm6 +; AVX512BW-NEXT: vpextrw $1, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm5, %esi +; AVX512BW-NEXT: vmovd %xmm6, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm7 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $2, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $3, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $4, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $5, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $6, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm7, %xmm7 +; AVX512BW-NEXT: vpextrw $7, %xmm5, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm6, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm7, %xmm5 +; AVX512BW-NEXT: vpextrw $1, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $1, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm2, %esi +; AVX512BW-NEXT: vmovd %xmm0, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $2, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $3, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $4, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $5, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $6, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $7, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm0, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm6, %xmm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vpextrw $1, %xmm2, %edx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; AVX512BW-NEXT: vpextrw $1, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm2, %esi +; AVX512BW-NEXT: vmovd %xmm4, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $2, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $3, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $4, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $5, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $6, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $7, %xmm2, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm4, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm5, %xmm2 +; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrw $1, %xmm4, %edx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; AVX512BW-NEXT: vpextrw $1, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm4, %esi +; AVX512BW-NEXT: vmovd %xmm5, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $2, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $3, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $4, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $5, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $6, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $7, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm6, %xmm4 +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrw $1, %xmm4, %edx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm5 +; AVX512BW-NEXT: vpextrw $1, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm4, %esi +; AVX512BW-NEXT: vmovd %xmm5, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm6 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $2, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $3, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $4, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $5, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $6, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm6, %xmm6 +; AVX512BW-NEXT: vpextrw $7, %xmm4, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm5, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $7, %edx, %xmm6, %xmm4 +; AVX512BW-NEXT: vpextrw $1, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $1, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vmovd %xmm3, %esi +; AVX512BW-NEXT: vmovd %xmm1, %edi +; AVX512BW-NEXT: cmpw %si, %di +; AVX512BW-NEXT: movl $0, %esi +; AVX512BW-NEXT: cmovgw %cx, %si +; AVX512BW-NEXT: vmovd %esi, %xmm5 +; AVX512BW-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $2, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $2, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $3, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $3, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $3, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $4, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $4, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $4, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $5, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $5, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $5, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $6, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $6, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgw %cx, %dx +; AVX512BW-NEXT: vpinsrw $6, %edx, %xmm5, %xmm5 +; AVX512BW-NEXT: vpextrw $7, %xmm3, %edx +; AVX512BW-NEXT: vpextrw $7, %xmm1, %esi +; AVX512BW-NEXT: cmpw %dx, %si +; AVX512BW-NEXT: cmovgw %cx, %ax +; AVX512BW-NEXT: vpinsrw $7, %eax, %xmm5, %xmm1 +; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <64 x i16> %a0, %a1 ret <64 x i1> %1 } @@ -6240,50 +8373,103 @@ define <128 x i1> @test_cmp_v128i8(<128 x i8> %a0, <128 x i8> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v128i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 -; AVX512-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 -; AVX512-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 -; AVX512-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX512-NEXT: vpmovsxbd %xmm4, %zmm4 -; AVX512-NEXT: vpslld $31, %zmm4, %zmm4 -; AVX512-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512-NEXT: kmovw %k0, 14(%rdi) -; AVX512-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512-NEXT: kmovw %k0, 12(%rdi) -; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512-NEXT: kmovw %k0, 10(%rdi) -; AVX512-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512-NEXT: kmovw %k0, 8(%rdi) -; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512-NEXT: kmovw %k0, 6(%rdi) -; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512-NEXT: kmovw %k0, 4(%rdi) -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512-NEXT: kmovw %k0, 2(%rdi) -; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kmovw %k0, (%rdi) -; AVX512-NEXT: movq %rdi, %rax -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v128i8: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 +; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 +; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4 +; AVX512F-NEXT: vpslld $31, %zmm4, %zmm4 +; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 +; AVX512F-NEXT: kmovw %k0, 14(%rdi) +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 +; AVX512F-NEXT: kmovw %k0, 12(%rdi) +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 +; AVX512F-NEXT: kmovw %k0, 10(%rdi) +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 +; AVX512F-NEXT: kmovw %k0, 8(%rdi) +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 +; AVX512F-NEXT: kmovw %k0, 6(%rdi) +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kmovw %k0, 4(%rdi) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kmovw %k0, 2(%rdi) +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, (%rdi) +; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v128i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 +; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4 +; AVX512DQ-NEXT: vpslld $31, %zmm4, %zmm4 +; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0 +; AVX512DQ-NEXT: kmovw %k0, 14(%rdi) +; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 +; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 +; AVX512DQ-NEXT: kmovw %k0, 12(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 +; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 +; AVX512DQ-NEXT: kmovw %k0, 10(%rdi) +; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 +; AVX512DQ-NEXT: kmovw %k0, 8(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 +; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, (%rdi) +; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v128i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm1, %k0 +; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm0, %k1 +; AVX512BW-NEXT: vpmovm2b %k1, %zmm0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 +; AVX512BW-NEXT: retq %1 = icmp sgt <128 x i8> %a0, %a1 ret <128 x i1> %1 } @@ -6781,231 +8967,684 @@ define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind ; AVX2-NEXT: popq %rbp ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v32f64: -; AVX512: # BB#0: -; AVX512-NEXT: vextractf32x4 $3, %zmm4, %xmm8 -; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm9 -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vucomisd %xmm8, %xmm9 -; AVX512-NEXT: movq $-1, %rcx -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm8 = xmm8[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] -; AVX512-NEXT: vucomisd %xmm8, %xmm9 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm8 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm10[0],xmm8[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm4, %xmm9 -; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm10 -; AVX512-NEXT: vucomisd %xmm9, %xmm10 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm11 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] -; AVX512-NEXT: vucomisd %xmm9, %xmm10 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm9 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] -; AVX512-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 -; AVX512-NEXT: vextractf32x4 $1, %zmm4, %xmm9 -; AVX512-NEXT: vextractf32x4 $1, %zmm0, %xmm10 -; AVX512-NEXT: vucomisd %xmm9, %xmm10 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm11 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] -; AVX512-NEXT: vucomisd %xmm9, %xmm10 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm9 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm8 -; AVX512-NEXT: vextractf32x4 $3, %zmm5, %xmm4 -; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm0 -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm9 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm0[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm5, %xmm4 -; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm0 -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 -; AVX512-NEXT: vextractf32x4 $1, %zmm5, %xmm4 -; AVX512-NEXT: vextractf32x4 $1, %zmm1, %xmm0 -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm0 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] -; AVX512-NEXT: vucomisd %xmm5, %xmm1 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512-NEXT: vucomisd %xmm5, %xmm1 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm8 -; AVX512-NEXT: vextractf32x4 $3, %zmm6, %xmm1 -; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm4 -; AVX512-NEXT: vucomisd %xmm1, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vucomisd %xmm1, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm6, %xmm4 -; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm5 -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vucomisd %xmm4, %xmm5 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: vextractf32x4 $1, %zmm6, %xmm1 -; AVX512-NEXT: vextractf32x4 $1, %zmm2, %xmm4 -; AVX512-NEXT: vucomisd %xmm1, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vucomisd %xmm1, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] -; AVX512-NEXT: vucomisd %xmm6, %xmm2 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512-NEXT: vucomisd %xmm5, %xmm2 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vextractf32x4 $3, %zmm7, %xmm1 -; AVX512-NEXT: vextractf32x4 $3, %zmm3, %xmm2 -; AVX512-NEXT: vucomisd %xmm1, %xmm2 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512-NEXT: vucomisd %xmm1, %xmm2 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm7, %xmm2 -; AVX512-NEXT: vextractf32x4 $2, %zmm3, %xmm4 -; AVX512-NEXT: vucomisd %xmm2, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vucomisd %xmm2, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512-NEXT: vextractf32x4 $1, %zmm7, %xmm2 -; AVX512-NEXT: vextractf32x4 $1, %zmm3, %xmm4 -; AVX512-NEXT: vucomisd %xmm2, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vucomisd %xmm2, %xmm4 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] -; AVX512-NEXT: vucomisd %xmm7, %xmm3 -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovaq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm7[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512-NEXT: vucomisd %xmm5, %xmm3 -; AVX512-NEXT: cmovaq %rcx, %rax -; AVX512-NEXT: vmovq %rax, %xmm3 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v32f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf32x4 $3, %zmm4, %xmm8 +; AVX512F-NEXT: vextractf32x4 $3, %zmm0, %xmm9 +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: vucomisd %xmm8, %xmm9 +; AVX512F-NEXT: movq $-1, %rcx +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm8 = xmm8[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512F-NEXT: vucomisd %xmm8, %xmm9 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm8 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm10[0],xmm8[0] +; AVX512F-NEXT: vextractf32x4 $2, %zmm4, %xmm9 +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm10 +; AVX512F-NEXT: vucomisd %xmm9, %xmm10 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm11 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] +; AVX512F-NEXT: vucomisd %xmm9, %xmm10 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm9 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] +; AVX512F-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 +; AVX512F-NEXT: vextractf32x4 $1, %zmm4, %xmm9 +; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm10 +; AVX512F-NEXT: vucomisd %xmm9, %xmm10 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm11 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] +; AVX512F-NEXT: vucomisd %xmm9, %xmm10 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm9 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512F-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm8 +; AVX512F-NEXT: vextractf32x4 $3, %zmm5, %xmm4 +; AVX512F-NEXT: vextractf32x4 $3, %zmm1, %xmm0 +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm9 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm0[0] +; AVX512F-NEXT: vextractf32x4 $2, %zmm5, %xmm4 +; AVX512F-NEXT: vextractf32x4 $2, %zmm1, %xmm0 +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512F-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 +; AVX512F-NEXT: vextractf32x4 $1, %zmm5, %xmm4 +; AVX512F-NEXT: vextractf32x4 $1, %zmm1, %xmm0 +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm0 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512F-NEXT: vucomisd %xmm5, %xmm1 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512F-NEXT: vucomisd %xmm5, %xmm1 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512F-NEXT: vextractf32x4 $3, %zmm6, %xmm1 +; AVX512F-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512F-NEXT: vucomisd %xmm1, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vucomisd %xmm1, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512F-NEXT: vextractf32x4 $2, %zmm6, %xmm4 +; AVX512F-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512F-NEXT: vucomisd %xmm4, %xmm5 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vextractf32x4 $1, %zmm6, %xmm1 +; AVX512F-NEXT: vextractf32x4 $1, %zmm2, %xmm4 +; AVX512F-NEXT: vucomisd %xmm1, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vucomisd %xmm1, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512F-NEXT: vucomisd %xmm6, %xmm2 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512F-NEXT: vucomisd %xmm5, %xmm2 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vextractf32x4 $3, %zmm7, %xmm1 +; AVX512F-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512F-NEXT: vucomisd %xmm1, %xmm2 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512F-NEXT: vucomisd %xmm1, %xmm2 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] +; AVX512F-NEXT: vextractf32x4 $2, %zmm7, %xmm2 +; AVX512F-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512F-NEXT: vucomisd %xmm2, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vucomisd %xmm2, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512F-NEXT: vextractf32x4 $1, %zmm7, %xmm2 +; AVX512F-NEXT: vextractf32x4 $1, %zmm3, %xmm4 +; AVX512F-NEXT: vucomisd %xmm2, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512F-NEXT: vucomisd %xmm2, %xmm4 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512F-NEXT: vucomisd %xmm7, %xmm3 +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovaq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm7[1,0] +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512F-NEXT: vucomisd %xmm5, %xmm3 +; AVX512F-NEXT: cmovaq %rcx, %rax +; AVX512F-NEXT: vmovq %rax, %xmm3 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v32f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm4, %xmm8 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm0, %xmm9 +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: vucomisd %xmm8, %xmm9 +; AVX512DQ-NEXT: movq $-1, %rcx +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm8 = xmm8[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512DQ-NEXT: vucomisd %xmm8, %xmm9 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm8 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm10[0],xmm8[0] +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm4, %xmm9 +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm0, %xmm10 +; AVX512DQ-NEXT: vucomisd %xmm9, %xmm10 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm11 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] +; AVX512DQ-NEXT: vucomisd %xmm9, %xmm10 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm9 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm4, %xmm9 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm0, %xmm10 +; AVX512DQ-NEXT: vucomisd %xmm9, %xmm10 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm11 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] +; AVX512DQ-NEXT: vucomisd %xmm9, %xmm10 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm9 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm8 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm5, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm1, %xmm0 +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm9 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm0[0] +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm5, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm1, %xmm0 +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm5, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm1, %xmm0 +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm1 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm1 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm8, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm6, %xmm1 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm2, %xmm4 +; AVX512DQ-NEXT: vucomisd %xmm1, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vucomisd %xmm1, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm6, %xmm4 +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm2, %xmm5 +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm6, %xmm1 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm2, %xmm4 +; AVX512DQ-NEXT: vucomisd %xmm1, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vucomisd %xmm1, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512DQ-NEXT: vucomisd %xmm6, %xmm2 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm2 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm7, %xmm1 +; AVX512DQ-NEXT: vextractf64x2 $3, %zmm3, %xmm2 +; AVX512DQ-NEXT: vucomisd %xmm1, %xmm2 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512DQ-NEXT: vucomisd %xmm1, %xmm2 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm7, %xmm2 +; AVX512DQ-NEXT: vextractf64x2 $2, %zmm3, %xmm4 +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm7, %xmm2 +; AVX512DQ-NEXT: vextractf64x2 $1, %zmm3, %xmm4 +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512DQ-NEXT: vucomisd %xmm7, %xmm3 +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovaq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm7[1,0] +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512DQ-NEXT: vucomisd %xmm5, %xmm3 +; AVX512DQ-NEXT: cmovaq %rcx, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm3 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v32f64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextractf32x4 $3, %zmm4, %xmm8 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm9 +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: vucomisd %xmm8, %xmm9 +; AVX512BW-NEXT: movq $-1, %rcx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm8 = xmm8[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512BW-NEXT: vucomisd %xmm8, %xmm9 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm8 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm10[0],xmm8[0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm4, %xmm9 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm10 +; AVX512BW-NEXT: vucomisd %xmm9, %xmm10 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm11 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] +; AVX512BW-NEXT: vucomisd %xmm9, %xmm10 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm9 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm4, %xmm9 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm0, %xmm10 +; AVX512BW-NEXT: vucomisd %xmm9, %xmm10 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm11 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm9 = xmm9[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm10 = xmm10[1,0] +; AVX512BW-NEXT: vucomisd %xmm9, %xmm10 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm9 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm8 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm5, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm1, %xmm0 +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm9 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm0[0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm5, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm1, %xmm0 +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm5, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm1, %xmm0 +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm0 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0] +; AVX512BW-NEXT: vucomisd %xmm5, %xmm1 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512BW-NEXT: vucomisd %xmm5, %xmm1 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm8 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm6, %xmm1 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vucomisd %xmm1, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vucomisd %xmm1, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm6, %xmm4 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] +; AVX512BW-NEXT: vucomisd %xmm4, %xmm5 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm6, %xmm1 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm2, %xmm4 +; AVX512BW-NEXT: vucomisd %xmm1, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vucomisd %xmm1, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] +; AVX512BW-NEXT: vucomisd %xmm6, %xmm2 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm6[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512BW-NEXT: vucomisd %xmm5, %xmm2 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm7, %xmm1 +; AVX512BW-NEXT: vextractf32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vucomisd %xmm1, %xmm2 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512BW-NEXT: vucomisd %xmm1, %xmm2 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] +; AVX512BW-NEXT: vextractf32x4 $2, %zmm7, %xmm2 +; AVX512BW-NEXT: vextractf32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vucomisd %xmm2, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vucomisd %xmm2, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm7, %xmm2 +; AVX512BW-NEXT: vextractf32x4 $1, %zmm3, %xmm4 +; AVX512BW-NEXT: vucomisd %xmm2, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] +; AVX512BW-NEXT: vucomisd %xmm2, %xmm4 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0] +; AVX512BW-NEXT: vucomisd %xmm7, %xmm3 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovaq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm7[1,0] +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] +; AVX512BW-NEXT: vucomisd %xmm5, %xmm3 +; AVX512BW-NEXT: cmovaq %rcx, %rax +; AVX512BW-NEXT: vmovq %rax, %xmm3 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <32 x double> %a0, %a1 ret <32 x i1> %1 } @@ -7639,263 +10278,780 @@ define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { ; AVX2-NEXT: popq %rbp ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v32i64: -; AVX512: # BB#0: -; AVX512-NEXT: vextracti32x4 $3, %zmm4, %xmm8 -; AVX512-NEXT: vpextrq $1, %xmm8, %rcx -; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm9 -; AVX512-NEXT: vpextrq $1, %xmm9, %rdx -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpq %rcx, %rdx -; AVX512-NEXT: movq $-1, %rcx -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vmovq %xmm8, %rdx -; AVX512-NEXT: vmovq %xmm9, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm8 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0] -; AVX512-NEXT: vextracti32x4 $2, %zmm4, %xmm9 -; AVX512-NEXT: vpextrq $1, %xmm9, %rdx -; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm10 -; AVX512-NEXT: vpextrq $1, %xmm10, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm11 -; AVX512-NEXT: vmovq %xmm9, %rdx -; AVX512-NEXT: vmovq %xmm10, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm9 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] -; AVX512-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 -; AVX512-NEXT: vextracti32x4 $1, %zmm4, %xmm9 -; AVX512-NEXT: vpextrq $1, %xmm9, %rdx -; AVX512-NEXT: vextracti32x4 $1, %zmm0, %xmm10 -; AVX512-NEXT: vpextrq $1, %xmm10, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm11 -; AVX512-NEXT: vmovq %xmm9, %rdx -; AVX512-NEXT: vmovq %xmm10, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm9 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] -; AVX512-NEXT: vpextrq $1, %xmm4, %rdx -; AVX512-NEXT: vpextrq $1, %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm8 -; AVX512-NEXT: vextracti32x4 $3, %zmm5, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rdx -; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm0 -; AVX512-NEXT: vpextrq $1, %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm9 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm0[0],xmm9[0] -; AVX512-NEXT: vextracti32x4 $2, %zmm5, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rdx -; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm0 -; AVX512-NEXT: vpextrq $1, %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm0, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 -; AVX512-NEXT: vextracti32x4 $1, %zmm5, %xmm0 -; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: vextracti32x4 $1, %zmm1, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm10 -; AVX512-NEXT: vmovq %xmm0, %rdx -; AVX512-NEXT: vmovq %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] -; AVX512-NEXT: vpextrq $1, %xmm5, %rdx -; AVX512-NEXT: vpextrq $1, %xmm1, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vmovq %xmm5, %rdx -; AVX512-NEXT: vmovq %xmm1, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm8 -; AVX512-NEXT: vextracti32x4 $3, %zmm6, %xmm1 -; AVX512-NEXT: vpextrq $1, %xmm1, %rdx -; AVX512-NEXT: vextracti32x4 $3, %zmm2, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vmovq %xmm1, %rdx -; AVX512-NEXT: vmovq %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm1 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] -; AVX512-NEXT: vextracti32x4 $2, %zmm6, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rdx -; AVX512-NEXT: vextracti32x4 $2, %zmm2, %xmm5 -; AVX512-NEXT: vpextrq $1, %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vmovq %xmm4, %rdx -; AVX512-NEXT: vmovq %xmm5, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512-NEXT: vextracti32x4 $1, %zmm6, %xmm0 -; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: vextracti32x4 $1, %zmm2, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vmovq %xmm0, %rdx -; AVX512-NEXT: vmovq %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] -; AVX512-NEXT: vpextrq $1, %xmm6, %rdx -; AVX512-NEXT: vpextrq $1, %xmm2, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vmovq %xmm6, %rdx -; AVX512-NEXT: vmovq %xmm2, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm1 -; AVX512-NEXT: vextracti32x4 $3, %zmm7, %xmm0 -; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: vextracti32x4 $3, %zmm3, %xmm2 -; AVX512-NEXT: vpextrq $1, %xmm2, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vmovq %xmm0, %rdx -; AVX512-NEXT: vmovq %xmm2, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] -; AVX512-NEXT: vextracti32x4 $2, %zmm7, %xmm2 -; AVX512-NEXT: vpextrq $1, %xmm2, %rdx -; AVX512-NEXT: vextracti32x4 $2, %zmm3, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vmovq %xmm2, %rdx -; AVX512-NEXT: vmovq %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm2 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm2 -; AVX512-NEXT: vextracti32x4 $1, %zmm7, %xmm0 -; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: vextracti32x4 $1, %zmm3, %xmm4 -; AVX512-NEXT: vpextrq $1, %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm5 -; AVX512-NEXT: vmovq %xmm0, %rdx -; AVX512-NEXT: vmovq %xmm4, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] -; AVX512-NEXT: vpextrq $1, %xmm7, %rdx -; AVX512-NEXT: vpextrq $1, %xmm3, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: movl $0, %edx -; AVX512-NEXT: cmovgq %rcx, %rdx -; AVX512-NEXT: vmovq %rdx, %xmm4 -; AVX512-NEXT: vmovq %xmm7, %rdx -; AVX512-NEXT: vmovq %xmm3, %rsi -; AVX512-NEXT: cmpq %rdx, %rsi -; AVX512-NEXT: cmovgq %rcx, %rax -; AVX512-NEXT: vmovq %rax, %xmm3 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v32i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $3, %zmm4, %xmm8 +; AVX512F-NEXT: vpextrq $1, %xmm8, %rcx +; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm9 +; AVX512F-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: cmpq %rcx, %rdx +; AVX512F-NEXT: movq $-1, %rcx +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vmovq %xmm8, %rdx +; AVX512F-NEXT: vmovq %xmm9, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm8 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0] +; AVX512F-NEXT: vextracti32x4 $2, %zmm4, %xmm9 +; AVX512F-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm10 +; AVX512F-NEXT: vpextrq $1, %xmm10, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm11 +; AVX512F-NEXT: vmovq %xmm9, %rdx +; AVX512F-NEXT: vmovq %xmm10, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm9 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] +; AVX512F-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 +; AVX512F-NEXT: vextracti32x4 $1, %zmm4, %xmm9 +; AVX512F-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm10 +; AVX512F-NEXT: vpextrq $1, %xmm10, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm11 +; AVX512F-NEXT: vmovq %xmm9, %rdx +; AVX512F-NEXT: vmovq %xmm10, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm9 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] +; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512F-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm8 +; AVX512F-NEXT: vextracti32x4 $3, %zmm5, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm0 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm9 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm0[0],xmm9[0] +; AVX512F-NEXT: vextracti32x4 $2, %zmm5, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm0 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm0, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512F-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 +; AVX512F-NEXT: vextracti32x4 $1, %zmm5, %xmm0 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm10 +; AVX512F-NEXT: vmovq %xmm0, %rdx +; AVX512F-NEXT: vmovq %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vmovq %xmm5, %rdx +; AVX512F-NEXT: vmovq %xmm1, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512F-NEXT: vextracti32x4 $3, %zmm6, %xmm1 +; AVX512F-NEXT: vpextrq $1, %xmm1, %rdx +; AVX512F-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vmovq %xmm1, %rdx +; AVX512F-NEXT: vmovq %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; AVX512F-NEXT: vextracti32x4 $2, %zmm6, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512F-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512F-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vmovq %xmm4, %rdx +; AVX512F-NEXT: vmovq %xmm5, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-NEXT: vextracti32x4 $1, %zmm6, %xmm0 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512F-NEXT: vextracti32x4 $1, %zmm2, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vmovq %xmm0, %rdx +; AVX512F-NEXT: vmovq %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX512F-NEXT: vpextrq $1, %xmm6, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vmovq %xmm6, %rdx +; AVX512F-NEXT: vmovq %xmm2, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm1 +; AVX512F-NEXT: vextracti32x4 $3, %zmm7, %xmm0 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512F-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512F-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vmovq %xmm0, %rdx +; AVX512F-NEXT: vmovq %xmm2, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512F-NEXT: vextracti32x4 $2, %zmm7, %xmm2 +; AVX512F-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512F-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vmovq %xmm2, %rdx +; AVX512F-NEXT: vmovq %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm2 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm2 +; AVX512F-NEXT: vextracti32x4 $1, %zmm7, %xmm0 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512F-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm5 +; AVX512F-NEXT: vmovq %xmm0, %rdx +; AVX512F-NEXT: vmovq %xmm4, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX512F-NEXT: vpextrq $1, %xmm7, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm3, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: movl $0, %edx +; AVX512F-NEXT: cmovgq %rcx, %rdx +; AVX512F-NEXT: vmovq %rdx, %xmm4 +; AVX512F-NEXT: vmovq %xmm7, %rdx +; AVX512F-NEXT: vmovq %xmm3, %rsi +; AVX512F-NEXT: cmpq %rdx, %rsi +; AVX512F-NEXT: cmovgq %rcx, %rax +; AVX512F-NEXT: vmovq %rax, %xmm3 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v32i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm4, %xmm8 +; AVX512DQ-NEXT: vpextrq $1, %xmm8, %rcx +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm0, %xmm9 +; AVX512DQ-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: cmpq %rcx, %rdx +; AVX512DQ-NEXT: movq $-1, %rcx +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vmovq %xmm8, %rdx +; AVX512DQ-NEXT: vmovq %xmm9, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm8 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0] +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm4, %xmm9 +; AVX512DQ-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm0, %xmm10 +; AVX512DQ-NEXT: vpextrq $1, %xmm10, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm11 +; AVX512DQ-NEXT: vmovq %xmm9, %rdx +; AVX512DQ-NEXT: vmovq %xmm10, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm9 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm4, %xmm9 +; AVX512DQ-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm10 +; AVX512DQ-NEXT: vpextrq $1, %xmm10, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm11 +; AVX512DQ-NEXT: vmovq %xmm9, %rdx +; AVX512DQ-NEXT: vmovq %xmm10, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm9 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm8 +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm5, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm1, %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm9 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm0[0],xmm9[0] +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm5, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm1, %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm0, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm5, %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm1, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm10 +; AVX512DQ-NEXT: vmovq %xmm0, %rdx +; AVX512DQ-NEXT: vmovq %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vmovq %xmm5, %rdx +; AVX512DQ-NEXT: vmovq %xmm1, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm8, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm6, %xmm1 +; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rdx +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm2, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vmovq %xmm1, %rdx +; AVX512DQ-NEXT: vmovq %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm1 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm6, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm2, %xmm5 +; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vmovq %xmm4, %rdx +; AVX512DQ-NEXT: vmovq %xmm5, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm6, %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm2, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vmovq %xmm0, %rdx +; AVX512DQ-NEXT: vmovq %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm6, %rdx +; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vmovq %xmm6, %rdx +; AVX512DQ-NEXT: vmovq %xmm2, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm1 +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm7, %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512DQ-NEXT: vextracti64x2 $3, %zmm3, %xmm2 +; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vmovq %xmm0, %rdx +; AVX512DQ-NEXT: vmovq %xmm2, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm7, %xmm2 +; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512DQ-NEXT: vextracti64x2 $2, %zmm3, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vmovq %xmm2, %rdx +; AVX512DQ-NEXT: vmovq %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm2 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm2 +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm7, %xmm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512DQ-NEXT: vextracti64x2 $1, %zmm3, %xmm4 +; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm5 +; AVX512DQ-NEXT: vmovq %xmm0, %rdx +; AVX512DQ-NEXT: vmovq %xmm4, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX512DQ-NEXT: vpextrq $1, %xmm7, %rdx +; AVX512DQ-NEXT: vpextrq $1, %xmm3, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: movl $0, %edx +; AVX512DQ-NEXT: cmovgq %rcx, %rdx +; AVX512DQ-NEXT: vmovq %rdx, %xmm4 +; AVX512DQ-NEXT: vmovq %xmm7, %rdx +; AVX512DQ-NEXT: vmovq %xmm3, %rsi +; AVX512DQ-NEXT: cmpq %rdx, %rsi +; AVX512DQ-NEXT: cmovgq %rcx, %rax +; AVX512DQ-NEXT: vmovq %rax, %xmm3 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v32i64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vextracti32x4 $3, %zmm4, %xmm8 +; AVX512BW-NEXT: vpextrq $1, %xmm8, %rcx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm9 +; AVX512BW-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: cmpq %rcx, %rdx +; AVX512BW-NEXT: movq $-1, %rcx +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vmovq %xmm8, %rdx +; AVX512BW-NEXT: vmovq %xmm9, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm8 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm4, %xmm9 +; AVX512BW-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm10 +; AVX512BW-NEXT: vpextrq $1, %xmm10, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm11 +; AVX512BW-NEXT: vmovq %xmm9, %rdx +; AVX512BW-NEXT: vmovq %xmm10, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm9 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm4, %xmm9 +; AVX512BW-NEXT: vpextrq $1, %xmm9, %rdx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm10 +; AVX512BW-NEXT: vpextrq $1, %xmm10, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm11 +; AVX512BW-NEXT: vmovq %xmm9, %rdx +; AVX512BW-NEXT: vmovq %xmm10, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm9 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0] +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm8 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm5, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm0 +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm9 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm0[0],xmm9[0] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm5, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm0 +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm0, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm5, %xmm0 +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm10 +; AVX512BW-NEXT: vmovq %xmm0, %rdx +; AVX512BW-NEXT: vmovq %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0] +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm1, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vmovq %xmm5, %rdx +; AVX512BW-NEXT: vmovq %xmm1, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm8 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm6, %xmm1 +; AVX512BW-NEXT: vpextrq $1, %xmm1, %rdx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vmovq %xmm1, %rdx +; AVX512BW-NEXT: vmovq %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm6, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm5 +; AVX512BW-NEXT: vpextrq $1, %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vmovq %xmm4, %rdx +; AVX512BW-NEXT: vmovq %xmm5, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm6, %xmm0 +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm2, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vmovq %xmm0, %rdx +; AVX512BW-NEXT: vmovq %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX512BW-NEXT: vpextrq $1, %xmm6, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vmovq %xmm6, %rdx +; AVX512BW-NEXT: vmovq %xmm2, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm1 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm7, %xmm0 +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2 +; AVX512BW-NEXT: vpextrq $1, %xmm2, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vmovq %xmm0, %rdx +; AVX512BW-NEXT: vmovq %xmm2, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512BW-NEXT: vextracti32x4 $2, %zmm7, %xmm2 +; AVX512BW-NEXT: vpextrq $1, %xmm2, %rdx +; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vmovq %xmm2, %rdx +; AVX512BW-NEXT: vmovq %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm2 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm7, %xmm0 +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rdx +; AVX512BW-NEXT: vextracti32x4 $1, %zmm3, %xmm4 +; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm5 +; AVX512BW-NEXT: vmovq %xmm0, %rdx +; AVX512BW-NEXT: vmovq %xmm4, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX512BW-NEXT: vpextrq $1, %xmm7, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm3, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: cmovgq %rcx, %rdx +; AVX512BW-NEXT: vmovq %rdx, %xmm4 +; AVX512BW-NEXT: vmovq %xmm7, %rdx +; AVX512BW-NEXT: vmovq %xmm3, %rsi +; AVX512BW-NEXT: cmpq %rdx, %rsi +; AVX512BW-NEXT: cmovgq %rcx, %rax +; AVX512BW-NEXT: vmovq %rax, %xmm3 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <32 x i64> %a0, %a1 ret <32 x i1> %1 } diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 39fbc7611de8..774d615ae896 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -1244,8 +1244,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: movzbl (%rdi), %eax ; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512F-NEXT: retq ; @@ -1253,8 +1252,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: movzbl (%rdi), %eax ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512BW-NEXT: retq ; @@ -1435,8 +1433,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: movzbl (%rdi), %eax ; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512F-NEXT: retq @@ -1445,8 +1442,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: movzbl (%rdi), %eax ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512BW-NEXT: retq @@ -1642,8 +1638,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: movzbl (%rdi), %eax ; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq ; @@ -1651,8 +1646,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: movzbl (%rdi), %eax ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512BW-NEXT: retq ; @@ -1945,8 +1939,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: movzbl (%rdi), %eax ; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: retq ; @@ -1954,8 +1947,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: movzbl (%rdi), %eax ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: retq ; @@ -2348,8 +2340,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: movzbl (%rdi), %eax ; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -2357,8 +2348,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: movzbl (%rdi), %eax ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; @@ -2860,8 +2850,7 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { ; AVX512-LABEL: load_sext_16i1_to_16i8: ; AVX512: # BB#0: # %entry ; AVX512-NEXT: kmovw (%rdi), %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; @@ -3398,8 +3387,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; AVX512-LABEL: load_sext_16i1_to_16i16: ; AVX512: # BB#0: # %entry ; AVX512-NEXT: kmovw (%rdi), %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: retq ; @@ -4244,12 +4232,11 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { ; AVX512: # BB#0: # %entry ; AVX512-NEXT: kmovw (%rdi), %k1 ; AVX512-NEXT: kmovw 2(%rdi), %k2 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k2} {z} +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; AVX512-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; X32-SSE41-LABEL: load_sext_32i1_to_32i8: diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll index 27b65b829923..440faa689fb8 100644 --- a/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW ; @@ -321,13 +322,22 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; XOP-NEXT: vpshaw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; AVX512-LABEL: var_shift_v8i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %XMM1 %XMM1 %ZMM1 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v8i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512DQ-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX512DQ-NEXT: vpsravd %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v8i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: retq ; ; X32-SSE-LABEL: var_shift_v8i16: ; X32-SSE: # BB#0: @@ -499,30 +509,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512-LABEL: var_shift_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: var_shift_v16i8: @@ -911,30 +901,10 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: splatvar_shift_v16i8: ; AVX512: # BB#0: ; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: splatvar_shift_v16i8: @@ -1221,13 +1191,21 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; XOP-NEXT: vpshaw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; AVX512-LABEL: constant_shift_v8i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] -; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v8i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v8i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: retq ; ; X32-SSE-LABEL: constant_shift_v8i16: ; X32-SSE: # BB#0: @@ -1384,31 +1362,9 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; AVX512-LABEL: constant_shift_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: constant_shift_v16i8: diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll index ee1879b6696e..79902acfec24 100644 --- a/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW ; ; Variable Shifts @@ -212,13 +213,21 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: var_shift_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %YMM1 %YMM1 %ZMM1 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: retq %shift = ashr <16 x i16> %a, %b ret <16 x i16> %shift } @@ -331,33 +340,41 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: var_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %shift = ashr <32 x i8> %a, %b ret <32 x i8> %shift } @@ -608,34 +625,43 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: splatvar_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: splatvar_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: splatvar_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = ashr <32 x i8> %a, %splat ret <32 x i8> %shift @@ -804,13 +830,20 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: constant_shift_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: retq %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } @@ -913,34 +946,41 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: constant_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 +; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll index 1280641c557b..2c9e433cfb2c 100644 --- a/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -26,25 +26,14 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: var_shift_v32i16: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15] -; AVX512DQ-NEXT: vpsravd %ymm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11] -; AVX512DQ-NEXT: vpsravd %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15] -; AVX512DQ-NEXT: vpsravd %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11] -; AVX512DQ-NEXT: vpsravd %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vpsravd %zmm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512DQ-NEXT: vpsravd %zmm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: var_shift_v32i16: @@ -1025,24 +1014,13 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { ; AVX512DQ-LABEL: constant_shift_v32i16: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] -; AVX512DQ-NEXT: vpsravd %ymm4, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] -; AVX512DQ-NEXT: vpsravd %ymm3, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15] -; AVX512DQ-NEXT: vpsravd %ymm4, %ymm5, %ymm4 -; AVX512DQ-NEXT: vpsrld $16, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11] -; AVX512DQ-NEXT: vpsravd %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpackusdw %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512DQ-NEXT: vpsravd %zmm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512DQ-NEXT: vpsravd %zmm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v32i16: diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll index 42488f2ec3a7..a7e1a531b659 100644 --- a/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW ; ; Just one 32-bit run to make sure we do reasonable things for i64 shifts. @@ -290,13 +291,22 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; AVX512-LABEL: var_shift_v8i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %XMM1 %XMM1 %ZMM1 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v8i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v8i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: retq ; ; X32-SSE-LABEL: var_shift_v8i16: ; X32-SSE: # BB#0: @@ -417,18 +427,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512-LABEL: var_shift_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $2, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: var_shift_v16i8: @@ -701,18 +703,10 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: splatvar_shift_v16i8: ; AVX512: # BB#0: ; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $2, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: splatvar_shift_v16i8: @@ -955,13 +949,21 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; AVX512-LABEL: constant_shift_v8i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] -; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v8i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v8i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: retq ; ; X32-SSE-LABEL: constant_shift_v8i16: ; X32-SSE: # BB#0: @@ -1064,19 +1066,9 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; AVX512-LABEL: constant_shift_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $2, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: constant_shift_v16i8: diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll index 5223d7bba353..25667e7d1661 100644 --- a/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW ; ; Variable Shifts @@ -189,13 +190,21 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: var_shift_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %YMM1 %YMM1 %ZMM1 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: retq %shift = lshr <16 x i16> %a, %b ret <16 x i16> %shift } @@ -275,21 +284,29 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: var_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %shift = lshr <32 x i8> %a, %b ret <32 x i8> %shift } @@ -490,22 +507,31 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: splatvar_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: splatvar_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: splatvar_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = lshr <32 x i8> %a, %splat ret <32 x i8> %shift @@ -659,13 +685,20 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: constant_shift_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: retq %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } @@ -739,22 +772,29 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: constant_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } diff --git a/test/CodeGen/X86/vector-shift-lshr-512.ll b/test/CodeGen/X86/vector-shift-lshr-512.ll index 4c3caf329fb7..3da8f9437e57 100644 --- a/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -27,25 +27,14 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: var_shift_v32i16: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15] -; AVX512DQ-NEXT: vpsrlvd %ymm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11] -; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15] -; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11] -; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: var_shift_v32i16: @@ -988,24 +977,13 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { ; AVX512DQ-LABEL: constant_shift_v32i16: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] -; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] -; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15] -; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm4 -; AVX512DQ-NEXT: vpsrld $16, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11] -; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpackusdw %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v32i16: diff --git a/test/CodeGen/X86/vector-shift-shl-128.ll b/test/CodeGen/X86/vector-shift-shl-128.ll index 5c89949e924b..8706078b40c9 100644 --- a/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/test/CodeGen/X86/vector-shift-shl-128.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW ; ; Just one 32-bit run to make sure we do reasonable things for i64 shifts. @@ -245,13 +246,22 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; AVX512-LABEL: var_shift_v8i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %XMM1 %XMM1 %ZMM1 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v8i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v8i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: retq ; ; X32-SSE-LABEL: var_shift_v8i16: ; X32-SSE: # BB#0: @@ -367,17 +377,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512-LABEL: var_shift_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsllw $2, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: var_shift_v16i8: @@ -642,17 +645,10 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: splatvar_shift_v16i8: ; AVX512: # BB#0: ; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsllw $2, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: splatvar_shift_v16i8: @@ -827,13 +823,18 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq ; -; AVX512-LABEL: constant_shift_v8i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] -; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v8i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v8i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512BW-NEXT: retq ; ; X32-SSE-LABEL: constant_shift_v8i16: ; X32-SSE: # BB#0: @@ -919,18 +920,9 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; ; AVX512-LABEL: constant_shift_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsllw $2, %xmm0, %xmm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: retq ; ; X32-SSE-LABEL: constant_shift_v16i8: diff --git a/test/CodeGen/X86/vector-shift-shl-256.ll b/test/CodeGen/X86/vector-shift-shl-256.ll index eb52ae3ccaca..a1ef2791c1b0 100644 --- a/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/test/CodeGen/X86/vector-shift-shl-256.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW ; @@ -164,13 +165,21 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: var_shift_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %YMM1 %YMM1 %ZMM1 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: retq %shift = shl <16 x i16> %a, %b ret <16 x i16> %shift } @@ -240,20 +249,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: var_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: var_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: var_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %shift = shl <32 x i8> %a, %b ret <32 x i8> %shift } @@ -446,21 +463,30 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: splatvar_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: splatvar_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: splatvar_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = shl <32 x i8> %a, %splat ret <32 x i8> %shift @@ -571,13 +597,18 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; XOPAVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: constant_shift_v16i16: -; AVX512: # BB#0: -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v16i16: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v16i16: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: retq %shift = shl <16 x i16> %a, ret <16 x i16> %shift } @@ -645,21 +676,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; AVX512-LABEL: constant_shift_v32i8: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] -; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 -; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512DQ-LABEL: constant_shift_v32i8: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] +; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: constant_shift_v32i8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq %shift = shl <32 x i8> %a, ret <32 x i8> %shift } diff --git a/test/CodeGen/X86/vector-shift-shl-512.ll b/test/CodeGen/X86/vector-shift-shl-512.ll index 520c3237a57f..b9c9b56427f1 100644 --- a/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/test/CodeGen/X86/vector-shift-shl-512.ll @@ -27,25 +27,14 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: var_shift_v32i16: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15] -; AVX512DQ-NEXT: vpsllvd %ymm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11] -; AVX512DQ-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15] -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15] -; AVX512DQ-NEXT: vpsllvd %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11] -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11] -; AVX512DQ-NEXT: vpsllvd %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: var_shift_v32i16: diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll index 2836d69a0fec..f4650ec741a7 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -178,13 +178,8 @@ define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_ ; ; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; AVX512BW-NEXT: vpshufb %ymm1, %ymm0, %ymm2 -; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm2[2,3,0,1] -; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48] +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: diff --git a/test/CodeGen/X86/vector-shuffle-masked.ll b/test/CodeGen/X86/vector-shuffle-masked.ll index 04d6b3733246..37fd022999e4 100644 --- a/test/CodeGen/X86/vector-shuffle-masked.ll +++ b/test/CodeGen/X86/vector-shuffle-masked.ll @@ -216,7 +216,8 @@ define <8 x i32> @mask_shuffle_v8i32_23456701(<8 x i32> %a, <8 x i32> %passthru, ; CHECK: # BB#0: ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -686,3 +687,33 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x doub %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru ret <2 x double> %res } + +define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) { +; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask: +; CHECK: # BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] +; CHECK-NEXT: retq + %q = load double, double* %x, align 1 + %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> + %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru + ret <2 x double> %res +} + +define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) { +; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz: +; CHECK: # BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] +; CHECK-NEXT: retq + %q = load double, double* %x, align 1 + %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 + %mask.cast = bitcast i8 %mask to <8 x i1> + %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> + %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer + ret <2 x double> %res +} diff --git a/test/CodeGen/X86/vector-shuffle-v1.ll b/test/CodeGen/X86/vector-shuffle-v1.ll index 3ad92737a2ef..4312b67546d2 100644 --- a/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/test/CodeGen/X86/vector-shuffle-v1.ll @@ -71,13 +71,12 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> % ; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0: ; AVX512F: # BB#0: ; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z} -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0] -; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1 -; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0] +; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: retq ; @@ -101,14 +100,13 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1 ; AVX512F: # BB#0: ; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z} -; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} -; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] -; AVX512F-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; AVX512F-NEXT: vpslld $31, %zmm3, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; AVX512F-NEXT: vpslld $31, %zmm2, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: retq ; @@ -157,13 +155,12 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) { ; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z} -; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1 -; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1 -; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: retq ; @@ -185,8 +182,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) { ; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u> ; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 @@ -215,8 +211,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) { ; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1] ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 @@ -241,8 +236,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) { ; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0] ; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 @@ -271,8 +265,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) { ; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7] ; AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 @@ -301,13 +294,12 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) { ; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: movb $51, %al ; AVX512F-NEXT: kmovw %eax, %k2 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z} -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1] -; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 +; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax @@ -337,10 +329,10 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) { ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z} -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7] -; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7] +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax @@ -367,8 +359,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) { ; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0: ; AVX512F: # BB#0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -403,9 +394,8 @@ define i64 @shuf64i1_zero(i64 %a) { ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $96, %rsp ; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 diff --git a/test/ExecutionEngine/Interpreter/lit.local.cfg b/test/ExecutionEngine/Interpreter/lit.local.cfg index 8cbaf03217d5..231d8e22cc6f 100644 --- a/test/ExecutionEngine/Interpreter/lit.local.cfg +++ b/test/ExecutionEngine/Interpreter/lit.local.cfg @@ -1,3 +1,3 @@ # These tests require foreign function calls -if config.enable_ffi != "ON": +if not config.enable_ffi: config.unsupported = True diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-relocations.s index 3ba95e4d394b..a9ec00939504 100644 --- a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-relocations.s +++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-relocations.s @@ -1,6 +1,11 @@ # RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -filetype=obj -o %T/be-reloc.o %s # RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -dummy-extern f=0x0123456789abcdef -check=%s %T/be-reloc.o + .globl Q + .section .dummy, "ax" +Q: + nop + .text .globl g .p2align 2 @@ -23,8 +28,11 @@ g: .globl k .p2align 3 k: - .xword f + .xword f .size k, 8 +r: +# R_AARCH64_PREL32: use Q instead of f to fit in 32 bits. + .word Q - . # LE instructions read as BE # rtdyld-check: *{4}(g) = 0x6024e0d2 @@ -32,3 +40,4 @@ k: # rtdyld-check: *{4}(g + 8) = 0x6035b1f2 # rtdyld-check: *{4}(g + 12) = 0xe0bd99f2 # rtdyld-check: *{8}k = f +# rtdyld-check: *{4}r = (Q - r)[31:0] diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_local_branch.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_local_branch.s new file mode 100644 index 000000000000..679930a14e06 --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_local_branch.s @@ -0,0 +1,14 @@ +# RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -o %T/branch.o %s +# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -check=%s %T/branch.o + +.globl _main +.weak _label1 + +.section .text.1,"ax" +_label1: + nop +_main: + b _label1 + +## Branch 1 instruction back from _main +# rtdyld-check: *{4}(_main) = 0x17ffffff diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_relocations.s index c57234a906e3..f9a03ab40667 100644 --- a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_relocations.s +++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_relocations.s @@ -1,6 +1,11 @@ # RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -o %T/reloc.o %s # RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -dummy-extern f=0x0123456789abcdef -check=%s %T/reloc.o - + + .globl Q + .section .dummy, "ax" +Q: + nop + .text .globl g .p2align 2 @@ -14,6 +19,18 @@ g: movk x0, #:abs_g1_nc:f # R_AARCH64_MOVW_UABS_G0_NC movk x0, #:abs_g0_nc:f +l: +# R_AARCH64_LDST32_ABS_LO12_NC + ldr s4, [x5, :lo12:a] +# R_AARCH64_LDST64_ABS_LO12_NC + ldr x4, [x5, :lo12:a] +p: +# R_AARCH64_ADR_PREL_PG_HI21 +# Test both low and high immediate values + adrp x4, a + 20480 # 16384 + 4096 +# Align next label to 16 bytes, so that LDST immediate +# fields will be non-zero + .align 4 a: # R_AARCH64_ADD_ABS_LO12_NC add x0, x0, :lo12:f @@ -27,13 +44,27 @@ a: .p2align 3 k: .xword f - .size k, 8 + .size k, 16 +r: +# R_AARCH64_PREL32: use Q instead of f to fit in 32 bits. + .word Q - . # rtdyld-check: *{4}(g) = 0xd2e02460 # rtdyld-check: *{4}(g + 4) = 0xf2c8ace0 # rtdyld-check: *{4}(g + 8) = 0xf2b13560 # rtdyld-check: *{4}(g + 12) = 0xf299bde0 + +## Check LDST32_ABS_LO12_NC and LDST64_ABS_LO12_NC +# rtdyld-check: (*{4}l)[21:10] = a[11:2] +# rtdyld-check: (*{4}(l+4))[21:10] = a[11:3] + +## Check ADR_PREL_PG_HI21. Low order bits of immediate value +## go to bits 30:29. High order bits go to bits 23:5 +# rtdyld-check: (*{4}p)[30:29] = (a - p + 20480)[13:12] +# rtdyld-check: (*{4}p)[23:5] = (a - p + 20480)[32:14] + # rtdyld-check: *{8}k = f +# rtdyld-check: *{4}r = (Q - r)[31:0] ## f & 0xFFF = 0xdef (bits 11:0 of f) ## 0xdef << 10 = 0x37bc00 diff --git a/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll b/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll index 9b0c1ef9b5e0..af4da14d786f 100644 --- a/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll +++ b/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll @@ -22,7 +22,7 @@ target triple = "x86_64-apple-macosx10.11.0" ; CHECK: @__asan_binder_global = internal global {{.*}} @global {{.*}} [[METADATA]] {{.*}} section "__DATA,__asan_liveness,regular,live_support" ; Test that there is the flag global variable: -; CHECK: @__asan_globals_registered = common global i64 0 +; CHECK: @__asan_globals_registered = common hidden global i64 0 ; The binder has to be inserted to llvm.compiler.used to avoid being stripped ; during LTO. diff --git a/test/JitListener/lit.local.cfg b/test/JitListener/lit.local.cfg index 05f34a744ad6..f485229b01c2 100644 --- a/test/JitListener/lit.local.cfg +++ b/test/JitListener/lit.local.cfg @@ -1,3 +1,3 @@ -if not config.root.llvm_use_intel_jitevents == "true": +if not config.root.llvm_use_intel_jitevents: config.unsupported = True diff --git a/test/ThinLTO/X86/Inputs/funcimport-tbaa.ll b/test/ThinLTO/X86/Inputs/funcimport-tbaa.ll new file mode 100644 index 000000000000..72aea1e5e252 --- /dev/null +++ b/test/ThinLTO/X86/Inputs/funcimport-tbaa.ll @@ -0,0 +1,11 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + + +define i32 @main() { +entry: + %unused = call float @globalfunc1(i32* null, float*null) + ret i32 0 +} + +declare float @globalfunc1(i32*, float*) \ No newline at end of file diff --git a/test/ThinLTO/X86/Inputs/local_name_conflict1.ll b/test/ThinLTO/X86/Inputs/local_name_conflict1.ll new file mode 100644 index 000000000000..2ef7bdd3eb7b --- /dev/null +++ b/test/ThinLTO/X86/Inputs/local_name_conflict1.ll @@ -0,0 +1,17 @@ +; ModuleID = 'local_name_conflict.o' +source_filename = "local_name_conflict.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @a() { +entry: + %call = call i32 @foo() + ret i32 %call +} + +; Function Attrs: noinline nounwind uwtable +define internal i32 @foo() { +entry: + ret i32 1 +} diff --git a/test/ThinLTO/X86/Inputs/local_name_conflict2.ll b/test/ThinLTO/X86/Inputs/local_name_conflict2.ll new file mode 100644 index 000000000000..a8c20a29228a --- /dev/null +++ b/test/ThinLTO/X86/Inputs/local_name_conflict2.ll @@ -0,0 +1,17 @@ +; ModuleID = 'local_name_conflict.o' +source_filename = "local_name_conflict.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @b() { +entry: + %call = call i32 @foo() + ret i32 %call +} + +; Function Attrs: noinline nounwind uwtable +define internal i32 @foo() { +entry: + ret i32 2 +} diff --git a/test/ThinLTO/X86/funcimport-tbaa.ll b/test/ThinLTO/X86/funcimport-tbaa.ll new file mode 100644 index 000000000000..c3dfd7d90b00 --- /dev/null +++ b/test/ThinLTO/X86/funcimport-tbaa.ll @@ -0,0 +1,38 @@ +; We generate invalid TBAA, hence -disable-verify, but this is a convenient way +; to trigger a metadata lazyloading crash + +; RUN: opt -module-summary %s -o %t.bc -bitcode-mdindex-threshold=0 -disable-verify +; RUN: opt -module-summary %p/Inputs/funcimport-tbaa.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc + + +; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc -o - \ +; RUN: | llvm-dis -o - | FileCheck %s --check-prefix=IMPORTGLOB1 +; IMPORTGLOB1: define available_externally float @globalfunc1 + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define float @globalfunc1(i32*, float*) { + %3 = load i32, i32* %0, align 4, !tbaa !0 + %4 = sitofp i32 %3 to float + %5 = load float, float* %1, align 4, !tbaa !4 + %6 = fadd float %4, %5 + ret float %6 +} + +; We need a second function for force the metadata to be emitted in the global block +define float @globalfunc2(i32*, float*) { + %3 = load i32, i32* %0, align 4, !tbaa !0 + %4 = sitofp i32 %3 to float + %5 = load float, float* %1, align 4, !tbaa !4 + %6 = fadd float %4, %5 + ret float %6 +} + +!0 = !{!1, !4, i64 0} +!1 = !{!"int", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"float", !2, i64 0} diff --git a/test/ThinLTO/X86/local_name_conflict.ll b/test/ThinLTO/X86/local_name_conflict.ll new file mode 100644 index 000000000000..9cbb32ecf211 --- /dev/null +++ b/test/ThinLTO/X86/local_name_conflict.ll @@ -0,0 +1,29 @@ +; Do setup work for all below tests: generate bitcode and combined index +; RUN: opt -module-summary -module-hash %s -o %t.bc +; RUN: opt -module-summary -module-hash %p/Inputs/local_name_conflict1.ll -o %t2.bc +; RUN: opt -module-summary -module-hash %p/Inputs/local_name_conflict2.ll -o %t3.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t4.bc %t.bc %t2.bc %t3.bc + +; Make sure foo is promoted and renamed without complaint in both +; Inputs/local_name_conflict1.ll and Inputs/local_name_conflict2.ll +; FIXME: Once the importer is fixed to import the correct copy of the +; local, we should be able to verify that via an import action. +; RUN: llvm-lto -thinlto-action=promote %t2.bc -thinlto-index=%t4.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTSTATIC +; RUN: llvm-lto -thinlto-action=promote %t3.bc -thinlto-index=%t4.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTSTATIC +; EXPORTSTATIC: define hidden i32 @foo.llvm. + +; ModuleID = 'local_name_conflict_main.o' +source_filename = "local_name_conflict_main.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %call = call i32 (...) @b() + ret i32 %call +} + +declare i32 @b(...) diff --git a/test/Transforms/GVN/invariant.group.ll b/test/Transforms/GVN/invariant.group.ll index 026671a5bdf4..d0b32d7f3dd8 100644 --- a/test/Transforms/GVN/invariant.group.ll +++ b/test/Transforms/GVN/invariant.group.ll @@ -344,11 +344,63 @@ _Z1gR1A.exit: ; preds = %0, %5 ret void } +; Check if no optimizations are performed with global pointers. +; FIXME: we could do the optimizations if we would check if dependency comes +; from the same function. +; CHECK-LABEL: define void @testGlobal() { +define void @testGlobal() { +; CHECK: %a = load i8, i8* @unknownPtr, !invariant.group !0 + %a = load i8, i8* @unknownPtr, !invariant.group !0 + call void @foo2(i8* @unknownPtr, i8 %a) +; CHECK: %1 = load i8, i8* @unknownPtr, !invariant.group !0 + %1 = load i8, i8* @unknownPtr, !invariant.group !0 + call void @bar(i8 %1) + + %b0 = bitcast i8* @unknownPtr to i1* + call void @fooBit(i1* %b0, i1 1) +; Adding regex because of canonicalization of bitcasts +; CHECK: %2 = load i1, i1* {{.*}}, !invariant.group !0 + %2 = load i1, i1* %b0, !invariant.group !0 + call void @fooBit(i1* %b0, i1 %2) +; CHECK: %3 = load i1, i1* {{.*}}, !invariant.group !0 + %3 = load i1, i1* %b0, !invariant.group !0 + call void @fooBit(i1* %b0, i1 %3) + ret void +} +; And in the case it is not global +; CHECK-LABEL: define void @testNotGlobal() { +define void @testNotGlobal() { + %a = alloca i8 + call void @foo(i8* %a) +; CHECK: %b = load i8, i8* %a, !invariant.group !0 + %b = load i8, i8* %a, !invariant.group !0 + call void @foo2(i8* %a, i8 %b) + + %1 = load i8, i8* %a, !invariant.group !0 +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %1) + + %b0 = bitcast i8* %a to i1* + call void @fooBit(i1* %b0, i1 1) +; CHECK: %trunc = trunc i8 %b to i1 + %2 = load i1, i1* %b0, !invariant.group !0 +; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %trunc) + call void @fooBit(i1* %b0, i1 %2) + %3 = load i1, i1* %b0, !invariant.group !0 +; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %trunc) + call void @fooBit(i1* %b0, i1 %3) + ret void +} + + declare void @foo(i8*) +declare void @foo2(i8*, i8) declare void @bar(i8) declare i8* @getPointer(i8*) declare void @_ZN1A3fooEv(%struct.A*) declare void @_ZN1AC1Ev(%struct.A*) +declare void @fooBit(i1*, i1) + declare i8* @llvm.invariant.group.barrier(i8*) ; Function Attrs: nounwind diff --git a/test/Transforms/InstCombine/assume.ll b/test/Transforms/InstCombine/assume.ll index 7987aa242319..6e690426db99 100644 --- a/test/Transforms/InstCombine/assume.ll +++ b/test/Transforms/InstCombine/assume.ll @@ -2,7 +2,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: nounwind uwtable define i32 @foo1(i32* %a) #0 { entry: %0 = load i32, i32* %a, align 4 @@ -22,7 +21,6 @@ entry: ret i32 %0 } -; Function Attrs: nounwind uwtable define i32 @foo2(i32* %a) #0 { entry: ; Same check as in @foo1, but make sure it works if the assume is first too. @@ -40,7 +38,6 @@ entry: ret i32 %0 } -; Function Attrs: nounwind declare void @llvm.assume(i1) #1 define i32 @simple(i32 %a) #1 { @@ -55,7 +52,6 @@ entry: ret i32 %a } -; Function Attrs: nounwind uwtable define i32 @can1(i1 %a, i1 %b, i1 %c) { entry: %and1 = and i1 %a, %b @@ -71,7 +67,6 @@ entry: ret i32 5 } -; Function Attrs: nounwind uwtable define i32 @can2(i1 %a, i1 %b, i1 %c) { entry: %v = or i1 %a, %b @@ -103,7 +98,6 @@ entry: ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @bar2(i32 %a) #0 { entry: ; CHECK-LABEL: @bar2 @@ -118,7 +112,6 @@ entry: ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 { entry: %and1 = and i32 %a, 3 @@ -139,7 +132,6 @@ entry: ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @bar4(i32 %a, i32 %b) { entry: %and1 = and i32 %b, 3 @@ -160,30 +152,41 @@ entry: } define i32 @icmp1(i32 %a) #0 { -entry: +; CHECK-LABEL: @icmp1( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 1 +; %cmp = icmp sgt i32 %a, 5 tail call void @llvm.assume(i1 %cmp) %conv = zext i1 %cmp to i32 ret i32 %conv - -; CHECK-LABEL: @icmp1 -; CHECK: call void @llvm.assume -; CHECK: ret i32 1 - } -; Function Attrs: nounwind uwtable define i32 @icmp2(i32 %a) #0 { -entry: +; CHECK-LABEL: @icmp2( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 0 +; %cmp = icmp sgt i32 %a, 5 tail call void @llvm.assume(i1 %cmp) - %0 = zext i1 %cmp to i32 - %lnot.ext = xor i32 %0, 1 + %t0 = zext i1 %cmp to i32 + %lnot.ext = xor i32 %t0, 1 ret i32 %lnot.ext +} -; CHECK-LABEL: @icmp2 -; CHECK: call void @llvm.assume -; CHECK: ret i32 0 +; FIXME: If the 'not' of a condition is known true, then the condition must be false. + +define i1 @assume_not(i1 %cond) { +; CHECK-LABEL: @assume_not( +; CHECK-NEXT: [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[NOTCOND]]) +; CHECK-NEXT: ret i1 [[COND]] +; + %notcond = xor i1 %cond, true + call void @llvm.assume(i1 %notcond) + ret i1 %cond } declare void @escape(i32* %a) diff --git a/test/Transforms/InstCombine/assume2.ll b/test/Transforms/InstCombine/assume2.ll index c41bbaa04eb7..e8fbc049f41a 100644 --- a/test/Transforms/InstCombine/assume2.ll +++ b/test/Transforms/InstCombine/assume2.ll @@ -1,170 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: nounwind declare void @llvm.assume(i1) #1 -; Function Attrs: nounwind uwtable define i32 @test1(i32 %a) #0 { -entry: -; CHECK-LABEL: @test1 -; CHECK: call void @llvm.assume -; CHECK: ret i32 5 - +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 5 +; %and = and i32 %a, 15 %cmp = icmp eq i32 %and, 5 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 7 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test2(i32 %a) #0 { -entry: -; CHECK-LABEL: @test2 -; CHECK: call void @llvm.assume -; CHECK: ret i32 2 - +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[A_NOT:%.*]] = or i32 [[A:%.*]], -16 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A_NOT]], -6 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 2 +; %and = and i32 %a, 15 %nand = xor i32 %and, -1 %cmp = icmp eq i32 %nand, 4294967285 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 7 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test3(i32 %a) #0 { -entry: -; CHECK-LABEL: @test3 -; CHECK: call void @llvm.assume -; CHECK: ret i32 5 - +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[V:%.*]] = or i32 [[A:%.*]], -16 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], -11 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 5 +; %v = or i32 %a, 4294967280 %cmp = icmp eq i32 %v, 4294967285 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 7 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test4(i32 %a) #0 { -entry: -; CHECK-LABEL: @test4 -; CHECK: call void @llvm.assume -; CHECK: ret i32 2 - +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A_NOT:%.*]] = and i32 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A_NOT]], 10 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 2 +; %v = or i32 %a, 4294967280 %nv = xor i32 %v, -1 %cmp = icmp eq i32 %nv, 5 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 7 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test5(i32 %a) #0 { -entry: -; CHECK-LABEL: @test5 -; CHECK: call void @llvm.assume -; CHECK: ret i32 4 - +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 4 +; %v = xor i32 %a, 1 %cmp = icmp eq i32 %v, 5 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 7 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test6(i32 %a) #0 { -entry: -; CHECK-LABEL: @test6 -; CHECK: call void @llvm.assume -; CHECK: ret i32 5 - +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[V_MASK:%.*]] = and i32 [[A:%.*]], 1073741823 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 5 +; %v = shl i32 %a, 2 %cmp = icmp eq i32 %v, 20 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 63 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test7(i32 %a) #0 { -entry: -; CHECK-LABEL: @test7 -; CHECK: call void @llvm.assume -; CHECK: ret i32 20 - +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[V_MASK:%.*]] = and i32 [[A:%.*]], -4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 20 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 20 +; %v = lshr i32 %a, 2 %cmp = icmp eq i32 %v, 5 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 252 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test8(i32 %a) #0 { -entry: -; CHECK-LABEL: @test8 -; CHECK: call void @llvm.assume -; CHECK: ret i32 20 - +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[V_MASK:%.*]] = and i32 [[A:%.*]], -4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 20 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 20 +; %v = lshr i32 %a, 2 %cmp = icmp eq i32 %v, 5 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 252 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test9(i32 %a) #0 { -entry: -; CHECK-LABEL: @test9 -; CHECK: call void @llvm.assume -; CHECK: ret i32 0 - +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 0 +; %cmp = icmp sgt i32 %a, 5 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 2147483648 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test10(i32 %a) #0 { -entry: -; CHECK-LABEL: @test10 -; CHECK: call void @llvm.assume -; CHECK: ret i32 -2147483648 - +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 -2147483648 +; %cmp = icmp sle i32 %a, -2 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 2147483648 ret i32 %and1 } -; Function Attrs: nounwind uwtable define i32 @test11(i32 %a) #0 { -entry: -; CHECK-LABEL: @test11 -; CHECK: call void @llvm.assume -; CHECK: ret i32 0 - +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 257 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 0 +; %cmp = icmp ule i32 %a, 256 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 3072 ret i32 %and1 } diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll index 09bea5895aaf..6b5f5a949530 100644 --- a/test/Transforms/InstCombine/fabs.ll +++ b/test/Transforms/InstCombine/fabs.ll @@ -13,7 +13,8 @@ define float @square_fabs_call_f32(float %x) { ; CHECK-LABEL: square_fabs_call_f32( ; CHECK-NEXT: %mul = fmul float %x, %x -; CHECK-NEXT: ret float %mul +; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul) +; CHECK-NEXT: ret float %fabsf } define double @square_fabs_call_f64(double %x) { @@ -23,7 +24,8 @@ define double @square_fabs_call_f64(double %x) { ; CHECK-LABEL: square_fabs_call_f64( ; CHECK-NEXT: %mul = fmul double %x, %x -; CHECK-NEXT: ret double %mul +; CHECK-NEXT: %fabs = tail call double @fabs(double %mul) +; CHECK-NEXT: ret double %fabs } define fp128 @square_fabs_call_f128(fp128 %x) { @@ -33,15 +35,18 @@ define fp128 @square_fabs_call_f128(fp128 %x) { ; CHECK-LABEL: square_fabs_call_f128( ; CHECK-NEXT: %mul = fmul fp128 %x, %x -; CHECK-NEXT: ret fp128 %mul +; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul) +; CHECK-NEXT: ret fp128 %fabsl } -; Make sure all intrinsic calls are eliminated when the input is known positive. +; Make sure all intrinsic calls are eliminated when the input is known +; positive. declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) +; The fabs cannot be eliminated because %x may be a NaN define float @square_fabs_intrinsic_f32(float %x) { %mul = fmul float %x, %x %fabsf = tail call float @llvm.fabs.f32(float %mul) @@ -49,7 +54,8 @@ define float @square_fabs_intrinsic_f32(float %x) { ; CHECK-LABEL: square_fabs_intrinsic_f32( ; CHECK-NEXT: %mul = fmul float %x, %x -; CHECK-NEXT: ret float %mul +; CHECK-NEXT: %fabsf = tail call float @llvm.fabs.f32(float %mul) +; CHECK-NEXT: ret float %fabsf } define double @square_fabs_intrinsic_f64(double %x) { @@ -59,7 +65,8 @@ define double @square_fabs_intrinsic_f64(double %x) { ; CHECK-LABEL: square_fabs_intrinsic_f64( ; CHECK-NEXT: %mul = fmul double %x, %x -; CHECK-NEXT: ret double %mul +; CHECK-NEXT: %fabs = tail call double @llvm.fabs.f64(double %mul) +; CHECK-NEXT: ret double %fabs } define fp128 @square_fabs_intrinsic_f128(fp128 %x) { @@ -69,7 +76,20 @@ define fp128 @square_fabs_intrinsic_f128(fp128 %x) { ; CHECK-LABEL: square_fabs_intrinsic_f128( ; CHECK-NEXT: %mul = fmul fp128 %x, %x -; CHECK-NEXT: ret fp128 %mul +; CHECK-NEXT: %fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul) +; CHECK-NEXT: ret fp128 %fabsl +} + +; TODO: This should be able to elimnated the fabs +define float @square_nnan_fabs_intrinsic_f32(float %x) { + %mul = fmul nnan float %x, %x + %fabsf = call float @llvm.fabs.f32(float %mul) + ret float %fabsf + +; CHECK-LABEL: square_nnan_fabs_intrinsic_f32( +; CHECK-NEXT: %mul = fmul nnan float %x, %x +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul) +; CHECK-NEXT: ret float %fabsf } ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization. @@ -82,7 +102,10 @@ define float @square_fabs_shrink_call1(float %x) { ret float %trunc ; CHECK-LABEL: square_fabs_shrink_call1( -; CHECK-NEXT: %trunc = fmul float %x, %x +; CHECK-NEXT: %ext = fpext float %x to double +; CHECK-NEXT: %sq = fmul double %ext, %ext +; CHECK-NEXT: call double @fabs(double %sq) +; CHECK-NEXT: %trunc = fptrunc double %fabs to float ; CHECK-NEXT: ret float %trunc } @@ -95,7 +118,8 @@ define float @square_fabs_shrink_call2(float %x) { ; CHECK-LABEL: square_fabs_shrink_call2( ; CHECK-NEXT: %sq = fmul float %x, %x -; CHECK-NEXT: ret float %sq +; CHECK-NEXT: %fabsf = call float @fabsf(float %sq) +; CHECK-NEXT: ret float %fabsf } ; CHECK-LABEL: @fabs_select_constant_negative_positive( diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index 6ccf6e9fa774..84f24ca0bf24 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -672,7 +672,8 @@ define double @sqrt_intrinsic_arg_4th(double %x) { ; CHECK-LABEL: sqrt_intrinsic_arg_4th( ; CHECK-NEXT: %mul = fmul fast double %x, %x -; CHECK-NEXT: ret double %mul +; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) +; CHECK-NEXT: ret double %fabs } define double @sqrt_intrinsic_arg_5th(double %x) { @@ -684,8 +685,9 @@ define double @sqrt_intrinsic_arg_5th(double %x) { ; CHECK-LABEL: sqrt_intrinsic_arg_5th( ; CHECK-NEXT: %mul = fmul fast double %x, %x +; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul) ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) -; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 ; CHECK-NEXT: ret double %1 } diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll index 1220dfdc77f0..4f18f3598540 100644 --- a/test/Transforms/InstCombine/urem-simplify-bug.ll +++ b/test/Transforms/InstCombine/urem-simplify-bug.ll @@ -1,32 +1,36 @@ -; RUN: opt < %s -instcombine -S | grep "= or i32 %x, -5" +; RUN: opt < %s -instcombine -S | FileCheck %s -@.str = internal constant [5 x i8] c"foo\0A\00" ; <[5 x i8]*> [#uses=1] -@.str1 = internal constant [5 x i8] c"bar\0A\00" ; <[5 x i8]*> [#uses=1] +@.str = internal constant [5 x i8] c"foo\0A\00" +@.str1 = internal constant [5 x i8] c"bar\0A\00" define i32 @main() nounwind { entry: - %x = call i32 @func_11( ) nounwind ; [#uses=1] - %tmp3 = or i32 %x, -5 ; [#uses=1] - %tmp5 = urem i32 251, %tmp3 ; [#uses=1] - %tmp6 = icmp ne i32 %tmp5, 0 ; [#uses=1] - %tmp67 = zext i1 %tmp6 to i32 ; [#uses=1] - %tmp9 = urem i32 %tmp67, 95 ; [#uses=1] - %tmp10 = and i32 %tmp9, 1 ; [#uses=1] - %tmp12 = icmp eq i32 %tmp10, 0 ; [#uses=1] - br i1 %tmp12, label %bb14, label %bb - -bb: ; preds = %entry - br label %bb15 - -bb14: ; preds = %entry - br label %bb15 - -bb15: ; preds = %bb14, %bb - %iftmp.0.0 = phi i8* [ getelementptr ([5 x i8], [5 x i8]* @.str1, i32 0, i32 0), %bb14 ], [ getelementptr ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), %bb ] ; [#uses=1] - %tmp17 = call i32 (i8*, ...) @printf( i8* %iftmp.0.0 ) nounwind ; [#uses=0] - ret i32 0 + %x = call i32 @func_11() nounwind + %tmp3 = or i32 %x, -5 + %tmp5 = urem i32 251, %tmp3 + %tmp6 = icmp ne i32 %tmp5, 0 + %tmp67 = zext i1 %tmp6 to i32 + %tmp9 = urem i32 %tmp67, 95 + %tmp10 = and i32 %tmp9, 1 + %tmp12 = icmp eq i32 %tmp10, 0 + br i1 %tmp12, label %bb14, label %bb + +bb: + br label %bb15 + +bb14: + br label %bb15 + +bb15: + %iftmp.0.0 = phi i8* [ getelementptr ([5 x i8], [5 x i8]* @.str1, i32 0, i32 0), %bb14 ], [ getelementptr ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), %bb ] + %tmp17 = call i32 (i8*, ...) @printf(i8* %iftmp.0.0) nounwind + ret i32 0 } +; CHECK-LABEL: define i32 @main( +; CHECK: call i32 @func_11() +; CHECK-NEXT: br i1 false, label %bb14, label %bb + declare i32 @func_11() -declare i32 @printf(i8*, ...) nounwind +declare i32 @printf(i8*, ...) nounwind diff --git a/test/Transforms/InstSimplify/div.ll b/test/Transforms/InstSimplify/div.ll new file mode 100644 index 000000000000..b8ce34aaa37e --- /dev/null +++ b/test/Transforms/InstSimplify/div.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -instsimplify -S | FileCheck %s + +declare i32 @external() + +define i32 @div1() { +; CHECK-LABEL: @div1( +; CHECK: [[CALL:%.*]] = call i32 @external(), !range !0 +; CHECK-NEXT: ret i32 0 +; + %call = call i32 @external(), !range !0 + %urem = udiv i32 %call, 3 + ret i32 %urem +} + +!0 = !{i32 0, i32 3} diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll index df3f659b782e..c73d34346ded 100644 --- a/test/Transforms/InstSimplify/rem.ll +++ b/test/Transforms/InstSimplify/rem.ll @@ -49,3 +49,17 @@ define i32 @rem3(i32 %x, i32 %n) { %mod1 = urem i32 %mod, %n ret i32 %mod1 } + +declare i32 @external() + +define i32 @rem4() { +; CHECK-LABEL: @rem4( +; CHECK: [[CALL:%.*]] = call i32 @external(), !range !0 +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = call i32 @external(), !range !0 + %urem = urem i32 %call, 3 + ret i32 %urem +} + +!0 = !{i32 0, i32 3} diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index cb6981ede1e7..c61131b476b9 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -5,6 +5,8 @@ declare void @foo() +declare i32 @llvm.bitreverse.i32(i32) + ; This testcase tests for a problem where LICM hoists ; potentially trapping instructions when they are not guaranteed to execute. define i32 @test1(i1 %c) { @@ -122,3 +124,28 @@ then: ; preds = %tailrecurse ifend: ; preds = %tailrecurse ret { i32*, i32 } %d } + +; CHECK: define i32 @hoist_bitreverse(i32) +; CHECK: bitreverse +; CHECK: br label %header +define i32 @hoist_bitreverse(i32) { + br label %header + +header: + %sum = phi i32 [ 0, %1 ], [ %5, %latch ] + %2 = phi i32 [ 0, %1 ], [ %6, %latch ] + %3 = icmp slt i32 %2, 1024 + br i1 %3, label %body, label %return + +body: + %4 = call i32 @llvm.bitreverse.i32(i32 %0) + %5 = add i32 %sum, %4 + br label %latch + +latch: + %6 = add nsw i32 %2, 1 + br label %header + +return: + ret i32 %sum +} diff --git a/test/Transforms/LoopLoadElim/forward.ll b/test/Transforms/LoopLoadElim/forward.ll index ed0d162ab7e3..9a0e03a317c8 100644 --- a/test/Transforms/LoopLoadElim/forward.ll +++ b/test/Transforms/LoopLoadElim/forward.ll @@ -16,8 +16,8 @@ define void @f(i32* %A, i32* %B, i32* %C, i64 %N) { ; CHECK-NOT: %found.conflict{{.*}} = entry: -; for.body.ph: -; CHECK: %load_initial = load i32, i32* %A +; Make sure the hoisted load keeps the alignment +; CHECK: %load_initial = load i32, i32* %A, align 1 br label %for.body for.body: ; preds = %for.body, %entry @@ -34,7 +34,7 @@ for.body: ; preds = %for.body, %entry %a_p1 = add i32 %b, 2 store i32 %a_p1, i32* %Aidx_next, align 4 - %a = load i32, i32* %Aidx, align 4 + %a = load i32, i32* %Aidx, align 1 ; CHECK: %c = mul i32 %store_forwarded, 2 %c = mul i32 %a, 2 store i32 %c, i32* %Cidx, align 4 diff --git a/test/Transforms/LoopVectorize/iv_outside_user.ll b/test/Transforms/LoopVectorize/iv_outside_user.ll index d536d1023f41..8a44af96e7f4 100644 --- a/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -133,3 +133,48 @@ for.end: store i32 %phi2, i32* %p ret i32 %phi } + +; CHECK-LABEL: @PR30742 +; CHECK: min.iters.checked +; CHECK: %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2 +; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]] +; CHECK: middle.block +; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]] +; CHECK: %[[T15:.+]] = add i32 %tmp03, -7 +; CHECK: %[[T16:.+]] = shl i32 %[[N_MOD_VF]], 3 +; CHECK: %[[T17:.+]] = add i32 %[[T15]], %[[T16]] +; CHECK: %[[T18:.+]] = shl i32 {{.*}}, 3 +; CHECK: %ind.escape = sub i32 %[[T17]], %[[T18]] +; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph +define void @PR30742() { +BB0: + br label %BB1 + +BB1: + %tmp00 = load i32, i32* undef, align 16 + %tmp01 = sub i32 %tmp00, undef + %tmp02 = icmp slt i32 %tmp01, 1 + %tmp03 = select i1 %tmp02, i32 1, i32 %tmp01 + %tmp04 = add nsw i32 %tmp03, -7 + br label %BB2 + +BB2: + %tmp05 = phi i32 [ %tmp04, %BB1 ], [ %tmp06, %BB2 ] + %tmp06 = add i32 %tmp05, -8 + %tmp07 = icmp sgt i32 %tmp06, 0 + br i1 %tmp07, label %BB2, label %BB3 + +BB3: + %tmp08 = phi i32 [ %tmp05, %BB2 ] + %tmp09 = sub i32 %tmp00, undef + %tmp10 = icmp slt i32 %tmp09, 1 + %tmp11 = select i1 %tmp10, i32 1, i32 %tmp09 + %tmp12 = add nsw i32 %tmp11, -7 + br label %BB4 + +BB4: + %tmp13 = phi i32 [ %tmp12, %BB3 ], [ %tmp14, %BB4 ] + %tmp14 = add i32 %tmp13, -8 + %tmp15 = icmp sgt i32 %tmp14, 0 + br i1 %tmp15, label %BB4, label %BB1 +} diff --git a/test/Transforms/NewGVN/basic-cyclic-opt.ll b/test/Transforms/NewGVN/basic-cyclic-opt.ll new file mode 100644 index 000000000000..523ed2612e3c --- /dev/null +++ b/test/Transforms/NewGVN/basic-cyclic-opt.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +;; Function Attrs: nounwind ssp uwtable +;; We should eliminate the sub, and one of the phi nodes +define void @vnum_test1(i32* %data) #0 { +; CHECK-LABEL: @vnum_test1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB17:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP18:%.*]], [[BB17]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB19:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]] +; CHECK-NEXT: store i32 2, i32* [[TMP10]], align 4 +; CHECK-NEXT: store i32 0, i32* [[DATA]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]] +; CHECK-NEXT: br label [[BB17]] +; CHECK: bb17: +; CHECK-NEXT: [[TMP18]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb19: +; CHECK-NEXT: ret void +; +bb: + %tmp = getelementptr inbounds i32, i32* %data, i64 3 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 4 + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb17, %bb + %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb17 ] + %i.0 = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ] + %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb17 ] + %tmp5 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp5, label %bb6, label %bb19 + +bb6: ; preds = %bb4 + %tmp7 = getelementptr inbounds i32, i32* %data, i64 2 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = sext i32 %tmp8 to i64 + %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9 + store i32 2, i32* %tmp10, align 4 + %tmp11 = sub nsw i32 %m.0, %n.0 + %tmp12 = getelementptr inbounds i32, i32* %data, i64 0 + store i32 %tmp11, i32* %tmp12, align 4 + %tmp13 = getelementptr inbounds i32, i32* %data, i64 1 + %tmp14 = load i32, i32* %tmp13, align 4 + %tmp15 = add nsw i32 %m.0, %tmp14 + %tmp16 = add nsw i32 %n.0, %tmp14 + br label %bb17 + +bb17: ; preds = %bb6 + %tmp18 = add nsw i32 %i.0, 1 + br label %bb4 + +bb19: ; preds = %bb4 + ret void +} + +;; Function Attrs: nounwind ssp uwtable +;; We should eliminate the sub, one of the phi nodes, prove the store of the sub +;; and the load of data are equivalent, that the load always produces constant 0, and +;; delete the load replacing it with constant 0. +define i32 @vnum_test2(i32* %data) #0 { +; CHECK-LABEL: @vnum_test2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB19:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP20:%.*]], [[BB19]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB21:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]] +; CHECK-NEXT: store i32 2, i32* [[TMP10]], align 4 +; CHECK-NEXT: store i32 0, i32* [[DATA]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]] +; CHECK-NEXT: br label [[BB19]] +; CHECK: bb19: +; CHECK-NEXT: [[TMP20]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb21: +; CHECK-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr inbounds i32, i32* %data, i64 3 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 4 + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb19, %bb + %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb19 ] + %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb19 ] + %i.0 = phi i32 [ 0, %bb ], [ %tmp20, %bb19 ] + %p.0 = phi i32 [ undef, %bb ], [ %tmp18, %bb19 ] + %tmp5 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp5, label %bb6, label %bb21 + +bb6: ; preds = %bb4 + %tmp7 = getelementptr inbounds i32, i32* %data, i64 2 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = sext i32 %tmp8 to i64 + %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9 + store i32 2, i32* %tmp10, align 4 + %tmp11 = sub nsw i32 %m.0, %n.0 + %tmp12 = getelementptr inbounds i32, i32* %data, i64 0 + store i32 %tmp11, i32* %tmp12, align 4 + %tmp13 = getelementptr inbounds i32, i32* %data, i64 1 + %tmp14 = load i32, i32* %tmp13, align 4 + %tmp15 = add nsw i32 %m.0, %tmp14 + %tmp16 = add nsw i32 %n.0, %tmp14 + %tmp17 = getelementptr inbounds i32, i32* %data, i64 0 + %tmp18 = load i32, i32* %tmp17, align 4 + br label %bb19 + +bb19: ; preds = %bb6 + %tmp20 = add nsw i32 %i.0, 1 + br label %bb4 + +bb21: ; preds = %bb4 + ret i32 %p.0 +} + + +; Function Attrs: nounwind ssp uwtable +;; Same as test 2, with a conditional store of m-n, so it has to also discover +;; that data ends up with the same value no matter what branch is taken. +define i32 @vnum_test3(i32* %data) #0 { +; CHECK-LABEL: @vnum_test3( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[N_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP19:%.*]], [[BB21:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP22:%.*]], [[BB21]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 5 +; CHECK-NEXT: store i32 0, i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[I_0]], 30 +; CHECK-NEXT: br i1 [[TMP10]], label [[BB11:%.*]], label [[BB14:%.*]] +; CHECK: bb11: +; CHECK-NEXT: store i32 0, i32* [[TMP9]], align 4 +; CHECK-NEXT: br label [[BB14]] +; CHECK: bb14: +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP19]] = add nsw i32 [[N_0]], [[TMP18]] +; CHECK-NEXT: br label [[BB21]] +; CHECK: bb21: +; CHECK-NEXT: [[TMP22]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb23: +; CHECK-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr inbounds i32, i32* %data, i64 3 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = getelementptr inbounds i32, i32* %data, i64 4 + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb21, %bb + %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp20, %bb21 ] + %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp19, %bb21 ] + %p.0 = phi i32 [ 0, %bb ], [ %tmp16, %bb21 ] + %i.0 = phi i32 [ 0, %bb ], [ %tmp22, %bb21 ] + %tmp5 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp5, label %bb6, label %bb23 + +bb6: ; preds = %bb4 + %tmp7 = getelementptr inbounds i32, i32* %data, i64 2 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = getelementptr inbounds i32, i32* %data, i64 5 + store i32 0, i32* %tmp9, align 4 + %tmp10 = icmp slt i32 %i.0, 30 + br i1 %tmp10, label %bb11, label %bb14 + +bb11: ; preds = %bb6 + %tmp12 = sub nsw i32 %m.0, %n.0 + %tmp13 = getelementptr inbounds i32, i32* %data, i64 5 + store i32 %tmp12, i32* %tmp13, align 4 + br label %bb14 + +bb14: ; preds = %bb11, %bb6 + %tmp15 = getelementptr inbounds i32, i32* %data, i64 5 + %tmp16 = load i32, i32* %tmp15, align 4 + %tmp17 = getelementptr inbounds i32, i32* %data, i64 1 + %tmp18 = load i32, i32* %tmp17, align 4 + %tmp19 = add nsw i32 %m.0, %tmp18 + %tmp20 = add nsw i32 %n.0, %tmp18 + br label %bb21 + +bb21: ; preds = %bb14 + %tmp22 = add nsw i32 %i.0, 1 + br label %bb4 + +bb23: ; preds = %bb4 + ret i32 %p.0 +} + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0, !0, !0} + +!0 = !{!"Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)"} diff --git a/test/Transforms/NewGVN/cyclic-phi-handling.ll b/test/Transforms/NewGVN/cyclic-phi-handling.ll new file mode 100644 index 000000000000..283c78548995 --- /dev/null +++ b/test/Transforms/NewGVN/cyclic-phi-handling.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32 %arg, i32 %arg1, i32 (i32, i32)* %arg2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label %bb3 +; CHECK: bb3: +; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ %arg1, %bb ], [ [[TMP:%.*]]4, %bb7 ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ %arg, %bb ], [ [[TMP]], %bb7 ] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 %arg2(i32 [[TMP4]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label %bb7, label %bb8 +; CHECK: bb7: +; CHECK-NEXT: br label %bb3 +; CHECK: bb8: +; CHECK-NEXT: ret void +; +bb: + br label %bb3 + +;; While non-standard, llvm allows mutually dependent phi nodes +;; Ensure we do not infinite loop trying to process them +bb3: ; preds = %bb7, %bb + %tmp = phi i32 [ %arg1, %bb ], [ %tmp4, %bb7 ] + %tmp4 = phi i32 [ %arg, %bb ], [ %tmp, %bb7 ] + %tmp5 = call i32 %arg2(i32 %tmp4, i32 %tmp) + %tmp6 = icmp ne i32 %tmp5, 0 + br i1 %tmp6, label %bb7, label %bb8 + +bb7: ; preds = %bb3 + br label %bb3 + +bb8: ; preds = %bb3 + ret void +} diff --git a/test/Transforms/NewGVN/invariant.group.ll b/test/Transforms/NewGVN/invariant.group.ll index 2bddc99c8b85..80c6e05a8e24 100644 --- a/test/Transforms/NewGVN/invariant.group.ll +++ b/test/Transforms/NewGVN/invariant.group.ll @@ -345,11 +345,63 @@ _Z1gR1A.exit: ; preds = %0, %5 ret void } +; Check if no optimizations are performed with global pointers. +; FIXME: we could do the optimizations if we would check if dependency comes +; from the same function. +; CHECK-LABEL: define void @testGlobal() { +define void @testGlobal() { +; CHECK: %a = load i8, i8* @unknownPtr, !invariant.group !0 + %a = load i8, i8* @unknownPtr, !invariant.group !0 + call void @foo2(i8* @unknownPtr, i8 %a) +; CHECK: %1 = load i8, i8* @unknownPtr, !invariant.group !0 + %1 = load i8, i8* @unknownPtr, !invariant.group !0 + call void @bar(i8 %1) + + %b0 = bitcast i8* @unknownPtr to i1* + call void @fooBit(i1* %b0, i1 1) +; Adding regex because of canonicalization of bitcasts +; CHECK: %2 = load i1, i1* {{.*}}, !invariant.group !0 + %2 = load i1, i1* %b0, !invariant.group !0 + call void @fooBit(i1* %b0, i1 %2) +; CHECK: %3 = load i1, i1* {{.*}}, !invariant.group !0 + %3 = load i1, i1* %b0, !invariant.group !0 + call void @fooBit(i1* %b0, i1 %3) + ret void +} +; And in the case it is not global +; CHECK-LABEL: define void @testNotGlobal() { +define void @testNotGlobal() { + %a = alloca i8 + call void @foo(i8* %a) +; CHECK: %b = load i8, i8* %a, !invariant.group !0 + %b = load i8, i8* %a, !invariant.group !0 + call void @foo2(i8* %a, i8 %b) + + %1 = load i8, i8* %a, !invariant.group !0 +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %1) + + %b0 = bitcast i8* %a to i1* + call void @fooBit(i1* %b0, i1 1) +; CHECK: %trunc = trunc i8 %b to i1 + %2 = load i1, i1* %b0, !invariant.group !0 +; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %trunc) + call void @fooBit(i1* %b0, i1 %2) + %3 = load i1, i1* %b0, !invariant.group !0 +; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %trunc) + call void @fooBit(i1* %b0, i1 %3) + ret void +} + + declare void @foo(i8*) +declare void @foo2(i8*, i8) declare void @bar(i8) declare i8* @getPointer(i8*) declare void @_ZN1A3fooEv(%struct.A*) declare void @_ZN1AC1Ev(%struct.A*) +declare void @fooBit(i1*, i1) + declare i8* @llvm.invariant.group.barrier(i8*) ; Function Attrs: nounwind diff --git a/test/Transforms/NewGVN/memory-handling.ll b/test/Transforms/NewGVN/memory-handling.ll new file mode 100644 index 000000000000..a0c4a998b8b6 --- /dev/null +++ b/test/Transforms/NewGVN/memory-handling.ll @@ -0,0 +1,195 @@ +;; This test is really dependent on propagating a lot of memory info around, but in the end, not +;; screwing up a single add. +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.Letter = type { i32, i32, i32, i32 } + +@alPhrase = external local_unnamed_addr global [26 x %struct.Letter], align 16 +@aqMainMask = external local_unnamed_addr global [2 x i64], align 16 +@aqMainSign = external local_unnamed_addr global [2 x i64], align 16 +@cchPhraseLength = external local_unnamed_addr global i32, align 4 +@auGlobalFrequency = external local_unnamed_addr global [26 x i32], align 16 +@.str.7 = external hidden unnamed_addr constant [28 x i8], align 1 + +; Function Attrs: nounwind uwtable +declare void @Fatal(i8*, i32) local_unnamed_addr #0 + +; Function Attrs: nounwind readnone +declare i16** @__ctype_b_loc() local_unnamed_addr #1 + +; Function Attrs: nounwind uwtable +define void @BuildMask(i8* nocapture readonly) local_unnamed_addr #0 { + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([26 x %struct.Letter]* @alPhrase to i8*), i8 0, i64 416, i32 16, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([2 x i64]* @aqMainMask to i8*), i8 0, i64 16, i32 16, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([2 x i64]* @aqMainSign to i8*), i8 0, i64 16, i32 16, i1 false) + br label %.sink.split + +.sink.split: ; preds = %14, %1 + %.0 = phi i8* [ %0, %1 ], [ %.lcssa67, %14 ] + %.sink = phi i32 [ 0, %1 ], [ %23, %14 ] + store i32 %.sink, i32* @cchPhraseLength, align 4, !tbaa !1 + br label %2 + +; b; - // expected-note@+1 {{in instantiation of default member initializer}} template struct C { T a = { 0 }; }; // expected-error{{explicit}} - C c; // expected-note{{here}} + C c; // expected-note {{in instantiation of default member initializer}} } namespace PR16903 { diff --git a/test/SemaTemplate/temp_arg_nontype.cpp b/test/SemaTemplate/temp_arg_nontype.cpp index 93f11b5657d0..27a0a03f84f4 100644 --- a/test/SemaTemplate/temp_arg_nontype.cpp +++ b/test/SemaTemplate/temp_arg_nontype.cpp @@ -173,12 +173,16 @@ namespace pr6249 { } namespace PR6723 { - template void f(int (&a)[C]); // expected-note {{candidate template ignored}} \ - // expected-note{{substitution failure [with C = '\x00']}} + template void f(int (&a)[C]); // expected-note 3{{candidate template ignored: substitution failure [with C = '\x00']}} + // expected-note@-1 {{not viable: no known conversion from 'int [512]' to 'int (&)[0]'}} void g() { int arr512[512]; f(arr512); // expected-error{{no matching function for call}} f<512>(arr512); // expected-error{{no matching function for call}} + + int arr0[0]; + f(arr0); // expected-error{{no matching function for call}} + f<0>(arr0); // expected-error{{no matching function for call}} } } diff --git a/tools/c-index-test/core_main.cpp b/tools/c-index-test/core_main.cpp index 8976d9134916..0ab24fb6ccb9 100644 --- a/tools/c-index-test/core_main.cpp +++ b/tools/c-index-test/core_main.cpp @@ -166,6 +166,8 @@ static bool printSourceSymbols(ArrayRef Args) { static void printSymbolInfo(SymbolInfo SymInfo, raw_ostream &OS) { OS << getSymbolKindString(SymInfo.Kind); + if (SymInfo.SubKind != SymbolSubKind::None) + OS << '/' << getSymbolSubKindString(SymInfo.SubKind); if (SymInfo.Properties) { OS << '('; printSymbolProperties(SymInfo.Properties, OS); diff --git a/tools/driver/CMakeLists.txt b/tools/driver/CMakeLists.txt index 49bde947f4c6..f6e26fa11f41 100644 --- a/tools/driver/CMakeLists.txt +++ b/tools/driver/CMakeLists.txt @@ -72,7 +72,7 @@ endforeach() # Configure plist creation for OS X. set (TOOL_INFO_PLIST "Info.plist" CACHE STRING "Plist name") -if (APPLE) +if (APPLE) if (CLANG_VENDOR) set(TOOL_INFO_NAME "${CLANG_VENDOR} clang") else() @@ -82,20 +82,19 @@ if (APPLE) set(TOOL_INFO_UTI "${CLANG_VENDOR_UTI}") set(TOOL_INFO_VERSION "${CLANG_VERSION}") set(TOOL_INFO_BUILD_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - + set(TOOL_INFO_PLIST_OUT "${CMAKE_CURRENT_BINARY_DIR}/${TOOL_INFO_PLIST}") target_link_libraries(clang "-Wl,-sectcreate,__TEXT,__info_plist,${TOOL_INFO_PLIST_OUT}") configure_file("${TOOL_INFO_PLIST}.in" "${TOOL_INFO_PLIST_OUT}" @ONLY) - + set(TOOL_INFO_UTI) set(TOOL_INFO_NAME) set(TOOL_INFO_VERSION) set(TOOL_INFO_BUILD_VERSION) endif() -# the linker -order_file flag is only supported by ld64 -if(LD64_EXECUTABLE AND CLANG_ORDER_FILE) +if(CLANG_ORDER_FILE AND (LD64_EXECUTABLE OR GOLD_EXECUTABLE)) include(CMakePushCheckState) function(check_linker_flag flag out_var) @@ -105,9 +104,14 @@ if(LD64_EXECUTABLE AND CLANG_ORDER_FILE) cmake_pop_check_state() endfunction() + if (LD64_EXECUTABLE) + set(LINKER_ORDER_FILE_OPTION "-Wl,-order_file,${CLANG_ORDER_FILE}") + elseif (GOLD_EXECUTABLE) + set(LINKER_ORDER_FILE_OPTION "-Wl,--section-ordering-file,${CLANG_ORDER_FILE}") + endif() + # This is a test to ensure the actual order file works with the linker. - check_linker_flag("-Wl,-order_file,${CLANG_ORDER_FILE}" - LINKER_ORDER_FILE_WORKS) + check_linker_flag(${LINKER_ORDER_FILE_OPTION} LINKER_ORDER_FILE_WORKS) # Passing an empty order file disables some linker layout optimizations. # To work around this and enable workflows for re-linking when the order file @@ -117,7 +121,7 @@ if(LD64_EXECUTABLE AND CLANG_ORDER_FILE) if("${ORDER_FILE}" STREQUAL "\n") set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${CLANG_ORDER_FILE}) elseif(LINKER_ORDER_FILE_WORKS) - target_link_libraries(clang "-Wl,-order_file,${CLANG_ORDER_FILE}") + target_link_libraries(clang ${LINKER_ORDER_FILE_OPTION}) set_target_properties(clang PROPERTIES LINK_DEPENDS ${CLANG_ORDER_FILE}) endif() endif() diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index 6f9df680eef5..629e85803d64 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -5780,6 +5780,10 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyGoogleFormat("MACRO Constructor(const int& i) : a(a), b(b) {}"); verifyFormat("void f() { f(a, c * d); }"); verifyFormat("void f() { f(new a(), c * d); }"); + verifyFormat("void f(const MyOverride &override);"); + verifyFormat("void f(const MyFinal &final);"); + verifyIndependentOfContext("bool a = f() && override.f();"); + verifyIndependentOfContext("bool a = f() && final.f();"); verifyIndependentOfContext("InvalidRegions[*R] = 0;"); diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp index 59f4a4f6dcfe..230717fe47cc 100644 --- a/unittests/Format/FormatTestJS.cpp +++ b/unittests/Format/FormatTestJS.cpp @@ -858,6 +858,26 @@ TEST_F(FormatTestJS, AutomaticSemicolonInsertionHeuristic) { "return 1", "a = null\n" " return 1"); + verifyFormat( + "x = {\n" + " a: 1\n" + "}\n" + "class Y {}", + " x = {a : 1}\n" + " class Y { }"); +} + +TEST_F(FormatTestJS, ImportExportASI) { + verifyFormat( + "import {x} from 'y'\n" + "export function z() {}", + "import {x} from 'y'\n" + " export function z() {}"); + verifyFormat( + "export {x}\n" + "class Y {}", + " export {x}\n" + " class Y {\n}"); } TEST_F(FormatTestJS, ClosureStyleCasts) { diff --git a/www/cxx_dr_status.html b/www/cxx_dr_status.html index ee8ce025eb47..e7d2e5f87c9c 100644 --- a/www/cxx_dr_status.html +++ b/www/cxx_dr_status.html @@ -28,7 +28,7 @@

C++ Defect Report Support in Clang

-

Last updated: $Date: 2017-01-02 12:15:42 +0100 (Mon, 02 Jan 2017) $

+

Last updated: $Date: 2017-01-09 09:01:21 +0100 (Mon, 09 Jan 2017) $

C++ defect report implementation status

@@ -8143,7 +8143,7 @@ and POD class
- + @@ -8161,7 +8161,7 @@ and POD class - + @@ -8209,7 +8209,7 @@ and POD class - + -- cgit v1.2.3
1388 CD3 Missing non-deduced context following a function parameter packUnknownSVN
13891391 DRWP Conversions to parameter types with non-deduced template argumentsUnknownPartial
13921399 CD3 Deduction with multiple function parameter packsUnknownDuplicate of 1388
1400