aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-03 18:04:11 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-19 21:24:24 +0000
commit0c85e2760f6b5016c16d29f8c2f63f3ba2cf5298 (patch)
treed6c9033fa7ca2f632ddc81d371ef3faf921652db /contrib/llvm-project/llvm
parent92d4d6f1f60e5d9cb2c7e0dd5d632987e54741e8 (diff)
downloadsrc-0c85e2760f6b5016c16d29f8c2f63f3ba2cf5298.tar.gz
src-0c85e2760f6b5016c16d29f8c2f63f3ba2cf5298.zip
Merge llvm-project main llvmorg-18-init-16003-gfc5f51cf5af4
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project main llvmorg-18-init-16003-gfc5f51cf5af4. PR: 276104 MFC after: 1 month (cherry picked from commit 647cbc5de815c5651677bf8582797f716ec7b48d)
Diffstat (limited to 'contrib/llvm-project/llvm')
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h65
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h22
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td10
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h44
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h9
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp163
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp353
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Windows/Path.inc5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp243
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp230
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td152
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td83
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td1306
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td94
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td426
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/Host.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp146
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h1
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp3
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp3
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp21
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp18
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp37
-rw-r--r--contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h4
182 files changed, 4011 insertions, 2494 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h b/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h
index b7d0a1228ebf..d397b937d78c 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -33,6 +33,12 @@
/// the propagation of the impact of divergent control flow on the divergence of
/// values (sync dependencies).
///
+/// NOTE: In general, no interface exists for a transform to update
+/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
+/// transitive dependence, but it also does not provide an interface for
+/// updating itself. Given that, transforms should not preserve uniformity in
+/// their getAnalysisUsage() callback.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h b/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h
index 02fa28fc856d..a5b9eec50c82 100644
--- a/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h
+++ b/contrib/llvm-project/llvm/include/llvm/ADT/SmallString.h
@@ -89,30 +89,26 @@ public:
/// Check for string equality. This is more efficient than compare() when
/// the relative ordering of inequal strings isn't needed.
- bool equals(StringRef RHS) const {
- return str().equals(RHS);
- }
+ [[nodiscard]] bool equals(StringRef RHS) const { return str().equals(RHS); }
/// Check for string equality, ignoring case.
- bool equals_insensitive(StringRef RHS) const {
+ [[nodiscard]] bool equals_insensitive(StringRef RHS) const {
return str().equals_insensitive(RHS);
}
/// compare - Compare two strings; the result is negative, zero, or positive
/// if this string is lexicographically less than, equal to, or greater than
/// the \p RHS.
- int compare(StringRef RHS) const {
- return str().compare(RHS);
- }
+ [[nodiscard]] int compare(StringRef RHS) const { return str().compare(RHS); }
/// compare_insensitive - Compare two strings, ignoring case.
- int compare_insensitive(StringRef RHS) const {
+ [[nodiscard]] int compare_insensitive(StringRef RHS) const {
return str().compare_insensitive(RHS);
}
/// compare_numeric - Compare two strings, treating sequences of digits as
/// numbers.
- int compare_numeric(StringRef RHS) const {
+ [[nodiscard]] int compare_numeric(StringRef RHS) const {
return str().compare_numeric(RHS);
}
@@ -121,10 +117,14 @@ public:
/// @{
/// starts_with - Check if this string starts with the given \p Prefix.
- bool starts_with(StringRef Prefix) const { return str().starts_with(Prefix); }
+ [[nodiscard]] bool starts_with(StringRef Prefix) const {
+ return str().starts_with(Prefix);
+ }
/// ends_with - Check if this string ends with the given \p Suffix.
- bool ends_with(StringRef Suffix) const { return str().ends_with(Suffix); }
+ [[nodiscard]] bool ends_with(StringRef Suffix) const {
+ return str().ends_with(Suffix);
+ }
/// @}
/// @name String Searching
@@ -134,7 +134,7 @@ public:
///
/// \return - The index of the first occurrence of \p C, or npos if not
/// found.
- size_t find(char C, size_t From = 0) const {
+ [[nodiscard]] size_t find(char C, size_t From = 0) const {
return str().find(C, From);
}
@@ -142,7 +142,7 @@ public:
///
/// \returns The index of the first occurrence of \p Str, or npos if not
/// found.
- size_t find(StringRef Str, size_t From = 0) const {
+ [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const {
return str().find(Str, From);
}
@@ -150,7 +150,7 @@ public:
///
/// \returns The index of the last occurrence of \p C, or npos if not
/// found.
- size_t rfind(char C, size_t From = StringRef::npos) const {
+ [[nodiscard]] size_t rfind(char C, size_t From = StringRef::npos) const {
return str().rfind(C, From);
}
@@ -158,13 +158,11 @@ public:
///
/// \returns The index of the last occurrence of \p Str, or npos if not
/// found.
- size_t rfind(StringRef Str) const {
- return str().rfind(Str);
- }
+ [[nodiscard]] size_t rfind(StringRef Str) const { return str().rfind(Str); }
/// Find the first character in the string that is \p C, or npos if not
/// found. Same as find.
- size_t find_first_of(char C, size_t From = 0) const {
+ [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const {
return str().find_first_of(C, From);
}
@@ -172,13 +170,13 @@ public:
/// not found.
///
/// Complexity: O(size() + Chars.size())
- size_t find_first_of(StringRef Chars, size_t From = 0) const {
+ [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const {
return str().find_first_of(Chars, From);
}
/// Find the first character in the string that is not \p C or npos if not
/// found.
- size_t find_first_not_of(char C, size_t From = 0) const {
+ [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const {
return str().find_first_not_of(C, From);
}
@@ -186,13 +184,15 @@ public:
/// \p Chars, or npos if not found.
///
/// Complexity: O(size() + Chars.size())
- size_t find_first_not_of(StringRef Chars, size_t From = 0) const {
+ [[nodiscard]] size_t find_first_not_of(StringRef Chars,
+ size_t From = 0) const {
return str().find_first_not_of(Chars, From);
}
/// Find the last character in the string that is \p C, or npos if not
/// found.
- size_t find_last_of(char C, size_t From = StringRef::npos) const {
+ [[nodiscard]] size_t find_last_of(char C,
+ size_t From = StringRef::npos) const {
return str().find_last_of(C, From);
}
@@ -200,8 +200,8 @@ public:
/// found.
///
/// Complexity: O(size() + Chars.size())
- size_t find_last_of(
- StringRef Chars, size_t From = StringRef::npos) const {
+ [[nodiscard]] size_t find_last_of(StringRef Chars,
+ size_t From = StringRef::npos) const {
return str().find_last_of(Chars, From);
}
@@ -210,15 +210,11 @@ public:
/// @{
/// Return the number of occurrences of \p C in the string.
- size_t count(char C) const {
- return str().count(C);
- }
+ [[nodiscard]] size_t count(char C) const { return str().count(C); }
/// Return the number of non-overlapped occurrences of \p Str in the
/// string.
- size_t count(StringRef Str) const {
- return str().count(Str);
- }
+ [[nodiscard]] size_t count(StringRef Str) const { return str().count(Str); }
/// @}
/// @name Substring Operations
@@ -233,7 +229,8 @@ public:
/// \param N The number of characters to included in the substring. If \p N
/// exceeds the number of characters remaining in the string, the string
/// suffix (starting with \p Start) will be returned.
- StringRef substr(size_t Start, size_t N = StringRef::npos) const {
+ [[nodiscard]] StringRef substr(size_t Start,
+ size_t N = StringRef::npos) const {
return str().substr(Start, N);
}
@@ -247,14 +244,16 @@ public:
/// substring. If this is npos, or less than \p Start, or exceeds the
/// number of characters remaining in the string, the string suffix
/// (starting with \p Start) will be returned.
- StringRef slice(size_t Start, size_t End) const {
+ [[nodiscard]] StringRef slice(size_t Start, size_t End) const {
return str().slice(Start, End);
}
// Extra methods.
/// Explicit conversion to StringRef.
- StringRef str() const { return StringRef(this->data(), this->size()); }
+ [[nodiscard]] StringRef str() const {
+ return StringRef(this->data(), this->size());
+ }
// TODO: Make this const, if it's safe...
const char* c_str() {
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h
index 5d3bc64bf8b4..7b02b618f7cb 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -54,9 +54,6 @@ class ConstraintSystem {
/// constraint system.
DenseMap<Value *, unsigned> Value2Index;
- /// Current greatest common divisor for all coefficients in the system.
- uint32_t GCD = 1;
-
// Eliminate constraints from the system using Fourier–Motzkin elimination.
bool eliminateUsingFM();
@@ -88,10 +85,6 @@ public:
for (const auto &[Idx, C] : enumerate(R)) {
if (C == 0)
continue;
- auto A = std::abs(C);
- GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD})
- .getZExtValue();
-
NewRow.emplace_back(C, Idx);
}
if (Constraints.empty())
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 735be3680aea..048912beaba5 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1243,6 +1243,18 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr) const;
+ /// Returns the cost estimation for alternating opcode pattern that can be
+ /// lowered to a single instruction on the target. In X86 this is for the
+ /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
+ /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
+ /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
+ /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
+ /// \p VecTy is the vector type of the instruction to be generated.
+ InstructionCost getAltInstrCost(
+ VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The exact mask may be passed as Mask, or else the array will be empty.
/// The index and subtype parameters are used by the subvector insertion and
@@ -1944,6 +1956,10 @@ public:
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
+ virtual InstructionCost getAltInstrCost(
+ VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
@@ -2555,6 +2571,12 @@ public:
return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Args, CxtI);
}
+ InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+ unsigned Opcode1,
+ const SmallBitVector &OpcodeMask,
+ TTI::TargetCostKind CostKind) const override {
+ return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+ }
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 1d8f523e9792..7ad3ce512a35 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -554,6 +554,13 @@ public:
return 1;
}
+ InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+ unsigned Opcode1,
+ const SmallBitVector &OpcodeMask,
+ TTI::TargetCostKind CostKind) const {
+ return InstructionCost::getInvalid();
+ }
+
InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index e7debc652a0a..dcc1a4580b14 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -769,9 +769,6 @@ public:
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI,
BuildFnTy &MatchInfo);
- /// Fold boolean selects to logical operations.
- bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo);
-
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info);
/// Transform G_ADD(x, G_SUB(y, x)) to y.
@@ -814,6 +811,9 @@ public:
// Given a binop \p MI, commute operands 1 and 2.
void applyCommuteBinOpOperands(MachineInstr &MI);
+ /// Combine selects.
+ bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -904,6 +904,18 @@ private:
/// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0
bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal,
Register FalseVal, BuildFnTy &MatchInfo);
+
+ /// Try to fold selects to logical operations.
+ bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo);
+
+ bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo);
+
+ bool isOneOrOneSplat(Register Src, bool AllowUndefs);
+ bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
+ bool isConstantSplatVector(Register Src, int64_t SplatValue,
+ bool AllowUndefs);
+
+ std::optional<APInt> getConstantOrConstantSplatVector(Register Src);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index abbef03d02cb..669104307fa0 100644
--- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2562,6 +2562,13 @@ public:
AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
+ InsertPointTy createAtomicCompare(const LocationDescription &Loc,
+ AtomicOpValue &X, AtomicOpValue &V,
+ AtomicOpValue &R, Value *E, Value *D,
+ AtomicOrdering AO,
+ omp::OMPAtomicCompareOp Op,
+ bool IsXBinopExpr, bool IsPostfixUpdate,
+ bool IsFailOnly, AtomicOrdering Failure);
/// Create the control flow structure of a canonical OpenMP loop.
///
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index cb48f54b13a6..531b11123545 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+def global_ptr_ty : LLVMQualPointerType<1>;
+
class AMDGPUReadPreloadRegisterIntrinsic
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
@@ -2353,10 +2355,10 @@ def int_amdgcn_s_get_waveid_in_workgroup :
Intrinsic<[llvm_i32_ty], [],
[IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-class AMDGPUAtomicRtn<LLVMType vt> : Intrinsic <
+class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic <
[vt],
- [llvm_anyptr_ty, // vaddr
- vt], // vdata(VGPR)
+ [pt, // vaddr
+ vt], // vdata(VGPR)
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "",
[SDNPMemOperand]>;
@@ -2486,6 +2488,8 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var"
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
+def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>;
+
def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 2de2cf4185d8..84cac3ef700e 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -127,12 +127,20 @@ enum : int32_t {
#undef COMPUTE_PGM_RSRC1
// Compute program resource register 2. Must match hardware definition.
+// GFX6+.
#define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX11].
+#define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH)
+// GFX12+.
+#define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
- COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
+ COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1),
+ COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
@@ -166,23 +174,37 @@ enum : int32_t {
// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
-// [GFX10].
-#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
- AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
// GFX10+.
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
+// [GFX10].
+#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH)
+// [GFX10-GFX11].
+#define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH)
// GFX11+.
#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
+// [GFX11].
+#define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH)
+// GFX12+.
+#define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH)
enum : int32_t {
- COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
- COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
- COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
- COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
- COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
- COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
- COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
+ COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4),
+ COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4),
+ COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8),
+ COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6),
+ COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1),
+ COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1),
+ COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1),
+ COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1),
+ COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17),
+ COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1),
COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index b0683ac2e32c..3aceb247a26c 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -70,7 +70,8 @@ enum attributeBits {
ATTR_EVEXKZ = 0x1 << 11,
ATTR_EVEXB = 0x1 << 12,
ATTR_REX2 = 0x1 << 13,
- ATTR_max = 0x1 << 14,
+ ATTR_EVEXNF = 0x1 << 14,
+ ATTR_max = 0x1 << 15,
};
// Combinations of the above attributes that are relevant to instruction
@@ -137,12 +138,15 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \
ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \
ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \
+ ENUM_ENTRY(IC_EVEX_NF, 2, "requires EVEX and NF prefix") \
ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \
ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \
ENUM_ENTRY(IC_EVEX_OPSIZE, 2, "requires EVEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_NF, 3, "requires EVEX, NF and the OpSize prefix") \
ENUM_ENTRY(IC_EVEX_OPSIZE_ADSIZE, 3, \
"requires EVEX, OPSIZE and the ADSIZE prefix") \
ENUM_ENTRY(IC_EVEX_W, 3, "requires EVEX and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_NF, 4, "requires EVEX, W and NF prefix") \
ENUM_ENTRY(IC_EVEX_W_XS, 4, "requires EVEX, W, and XS prefix") \
ENUM_ENTRY(IC_EVEX_W_XD, 4, "requires EVEX, W, and XD prefix") \
ENUM_ENTRY(IC_EVEX_W_OPSIZE, 4, "requires EVEX, W, and OpSize") \
@@ -187,10 +191,13 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_XD_K, 4, "requires EVEX_K, L2, W and XD prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K, 4, "requires EVEX_K, L2, W and OpSize") \
ENUM_ENTRY(IC_EVEX_B, 1, "requires an EVEX_B prefix") \
+ ENUM_ENTRY(IC_EVEX_B_NF, 2, "requires EVEX_NF and EVEX_B prefix") \
ENUM_ENTRY(IC_EVEX_XS_B, 2, "requires EVEX_B and the XS prefix") \
ENUM_ENTRY(IC_EVEX_XD_B, 2, "requires EVEX_B and the XD prefix") \
ENUM_ENTRY(IC_EVEX_OPSIZE_B, 2, "requires EVEX_B and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_B_NF, 3, "requires EVEX_B, NF and Opsize prefix") \
ENUM_ENTRY(IC_EVEX_W_B, 3, "requires EVEX_B and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_B_NF, 4, "requires EVEX_NF, EVEX_B and the W prefix") \
ENUM_ENTRY(IC_EVEX_W_XS_B, 4, "requires EVEX_B, W, and XS prefix") \
ENUM_ENTRY(IC_EVEX_W_XD_B, 4, "requires EVEX_B, W, and XD prefix") \
ENUM_ENTRY(IC_EVEX_W_OPSIZE_B, 4, "requires EVEX_B, W, and OpSize") \
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
index 77db371adaf7..6bda80681432 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -437,13 +437,6 @@ def select_constant_cmp: GICombineRule<
(apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }])
>;
-def select_to_logical : GICombineRule<
- (defs root:$root, build_fn_matchinfo:$matchinfo),
- (match (wip_match_opcode G_SELECT):$root,
- [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]),
- (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
->;
-
// Fold (C op x) -> (x op C)
// TODO: handle more isCommutable opcodes
// TODO: handle compares (currently not marked as isCommutable)
@@ -1242,6 +1235,12 @@ def select_to_minmax: GICombineRule<
[{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def match_selects : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SELECT):$root,
+ [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1282,7 +1281,7 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
def phi_combines : GICombineGroup<[extend_through_phis]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
- select_to_logical]>;
+ match_selects]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one, idempotent_prop]>;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
index 8a802515b6f4..35bdd869a88d 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -29,7 +29,6 @@ bool ConstraintSystem::eliminateUsingFM() {
assert(!Constraints.empty() &&
"should only be called for non-empty constraint systems");
- uint32_t NewGCD = 1;
unsigned LastIdx = NumVariables - 1;
// First, either remove the variable in place if it is 0 or add the row to
@@ -96,24 +95,20 @@ bool ConstraintSystem::eliminateUsingFM() {
IdxUpper++;
}
- if (MulOverflow(UpperV, ((-1) * LowerLast / GCD), M1))
+ if (MulOverflow(UpperV, ((-1) * LowerLast), M1))
return false;
if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) {
LowerV = LowerRow[IdxLower].Coefficient;
IdxLower++;
}
- if (MulOverflow(LowerV, (UpperLast / GCD), M2))
+ if (MulOverflow(LowerV, (UpperLast), M2))
return false;
if (AddOverflow(M1, M2, N))
return false;
if (N == 0)
continue;
NR.emplace_back(N, CurrentId);
-
- NewGCD =
- APIntOps::GreatestCommonDivisor({32, (uint32_t)N}, {32, NewGCD})
- .getZExtValue();
}
if (NR.empty())
continue;
@@ -124,7 +119,6 @@ bool ConstraintSystem::eliminateUsingFM() {
}
}
NumVariables -= 1;
- GCD = NewGCD;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 5beac5547d65..78a833476334 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1189,14 +1189,26 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = simplifyDivRem(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- // If this is an exact divide by a constant, then the dividend (Op0) must have
- // at least as many trailing zeros as the divisor to divide evenly. If it has
- // less trailing zeros, then the result must be poison.
const APInt *DivC;
- if (IsExact && match(Op1, m_APInt(DivC)) && DivC->countr_zero()) {
- KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q);
- if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero())
- return PoisonValue::get(Op0->getType());
+ if (IsExact && match(Op1, m_APInt(DivC))) {
+ // If this is an exact divide by a constant, then the dividend (Op0) must
+ // have at least as many trailing zeros as the divisor to divide evenly. If
+ // it has less trailing zeros, then the result must be poison.
+ if (DivC->countr_zero()) {
+ KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q);
+ if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero())
+ return PoisonValue::get(Op0->getType());
+ }
+
+ // udiv exact (mul nsw X, C), C --> X
+ // sdiv exact (mul nuw X, C), C --> X
+ // where C is not a power of 2.
+ Value *X;
+ if (!DivC->isPowerOf2() &&
+ (Opcode == Instruction::UDiv
+ ? match(Op0, m_NSWMul(m_Value(X), m_Specific(Op1)))
+ : match(Op0, m_NUWMul(m_Value(X), m_Specific(Op1)))))
+ return X;
}
return nullptr;
@@ -4857,14 +4869,12 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
// select ?, poison, X -> X
// select ?, undef, X -> X
if (isa<PoisonValue>(TrueVal) ||
- (Q.isUndefValue(TrueVal) &&
- isGuaranteedNotToBePoison(FalseVal, Q.AC, Q.CxtI, Q.DT)))
+ (Q.isUndefValue(TrueVal) && impliesPoison(FalseVal, Cond)))
return FalseVal;
// select ?, X, poison -> X
// select ?, X, undef -> X
if (isa<PoisonValue>(FalseVal) ||
- (Q.isUndefValue(FalseVal) &&
- isGuaranteedNotToBePoison(TrueVal, Q.AC, Q.CxtI, Q.DT)))
+ (Q.isUndefValue(FalseVal) && impliesPoison(TrueVal, Cond)))
return TrueVal;
// Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC''
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index 89cc7ea15ec1..360fc594ef7c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -434,6 +434,28 @@ class LazyValueInfoImpl {
void solve();
+ // For the following methods, if UseBlockValue is true, the function may
+ // push additional values to the worklist and return nullopt. If
+ // UseBlockValue is false, it will never return nullopt.
+
+ std::optional<ValueLatticeElement>
+ getValueFromSimpleICmpCondition(CmpInst::Predicate Pred, Value *RHS,
+ const APInt &Offset, Instruction *CxtI,
+ bool UseBlockValue);
+
+ std::optional<ValueLatticeElement>
+ getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest,
+ bool UseBlockValue);
+
+ std::optional<ValueLatticeElement>
+ getValueFromCondition(Value *Val, Value *Cond, bool IsTrueDest,
+ bool UseBlockValue, unsigned Depth = 0);
+
+ std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
+ BasicBlock *BBFrom,
+ BasicBlock *BBTo,
+ bool UseBlockValue);
+
public:
/// This is the query interface to determine the lattice value for the
/// specified Value* at the context instruction (if specified) or at the
@@ -755,14 +777,10 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
return Result;
}
-static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond,
- bool isTrueDest = true,
- unsigned Depth = 0);
-
// If we can determine a constraint on the value given conditions assumed by
// the program, intersect those constraints with BBLV
void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
- Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) {
+ Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) {
BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
if (!BBI)
return;
@@ -779,17 +797,21 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
if (I->getParent() != BB || !isValidAssumeForContext(I, BBI))
continue;
- BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0)));
+ BBLV = intersect(BBLV, *getValueFromCondition(Val, I->getArgOperand(0),
+ /*IsTrueDest*/ true,
+ /*UseBlockValue*/ false));
}
// If guards are not used in the module, don't spend time looking for them
if (GuardDecl && !GuardDecl->use_empty() &&
BBI->getIterator() != BB->begin()) {
- for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()),
- BB->rend())) {
+ for (Instruction &I :
+ make_range(std::next(BBI->getIterator().getReverse()), BB->rend())) {
Value *Cond = nullptr;
if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
- BBLV = intersect(BBLV, getValueFromCondition(Val, Cond));
+ BBLV = intersect(BBLV,
+ *getValueFromCondition(Val, Cond, /*IsTrueDest*/ true,
+ /*UseBlockValue*/ false));
}
}
@@ -886,10 +908,14 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
// If the value is undef, a different value may be chosen in
// the select condition.
if (isGuaranteedNotToBeUndef(Cond, AC)) {
- TrueVal = intersect(TrueVal,
- getValueFromCondition(SI->getTrueValue(), Cond, true));
- FalseVal = intersect(
- FalseVal, getValueFromCondition(SI->getFalseValue(), Cond, false));
+ TrueVal =
+ intersect(TrueVal, *getValueFromCondition(SI->getTrueValue(), Cond,
+ /*IsTrueDest*/ true,
+ /*UseBlockValue*/ false));
+ FalseVal =
+ intersect(FalseVal, *getValueFromCondition(SI->getFalseValue(), Cond,
+ /*IsTrueDest*/ false,
+ /*UseBlockValue*/ false));
}
ValueLatticeElement Result = TrueVal;
@@ -950,9 +976,11 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
// lets us pick up facts from expressions like "and i32 (call i32
// @foo()), 32"
std::optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB);
+ if (!LHSRes)
+ return std::nullopt;
+
std::optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB);
- if (!LHSRes || !RHSRes)
- // More work to do before applying this transfer rule.
+ if (!RHSRes)
return std::nullopt;
const ConstantRange &LHSRange = *LHSRes;
@@ -1068,15 +1096,26 @@ static bool matchICmpOperand(APInt &Offset, Value *LHS, Value *Val,
}
/// Get value range for a "(Val + Offset) Pred RHS" condition.
-static ValueLatticeElement getValueFromSimpleICmpCondition(
- CmpInst::Predicate Pred, Value *RHS, const APInt &Offset) {
+std::optional<ValueLatticeElement>
+LazyValueInfoImpl::getValueFromSimpleICmpCondition(CmpInst::Predicate Pred,
+ Value *RHS,
+ const APInt &Offset,
+ Instruction *CxtI,
+ bool UseBlockValue) {
ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
/*isFullSet=*/true);
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
RHSRange = ConstantRange(CI->getValue());
- else if (Instruction *I = dyn_cast<Instruction>(RHS))
+ } else if (UseBlockValue) {
+ std::optional<ValueLatticeElement> R =
+ getBlockValue(RHS, CxtI->getParent(), CxtI);
+ if (!R)
+ return std::nullopt;
+ RHSRange = toConstantRange(*R, RHS->getType());
+ } else if (Instruction *I = dyn_cast<Instruction>(RHS)) {
if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
RHSRange = getConstantRangeFromMetadata(*Ranges);
+ }
ConstantRange TrueValues =
ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
@@ -1103,8 +1142,8 @@ getRangeViaSLT(CmpInst::Predicate Pred, APInt RHS,
return std::nullopt;
}
-static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
- bool isTrueDest) {
+std::optional<ValueLatticeElement> LazyValueInfoImpl::getValueFromICmpCondition(
+ Value *Val, ICmpInst *ICI, bool isTrueDest, bool UseBlockValue) {
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
@@ -1128,11 +1167,13 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
unsigned BitWidth = Ty->getScalarSizeInBits();
APInt Offset(BitWidth, 0);
if (matchICmpOperand(Offset, LHS, Val, EdgePred))
- return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset);
+ return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset, ICI,
+ UseBlockValue);
CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(EdgePred);
if (matchICmpOperand(Offset, RHS, Val, SwappedPred))
- return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset);
+ return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset, ICI,
+ UseBlockValue);
const APInt *Mask, *C;
if (match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) &&
@@ -1212,10 +1253,12 @@ static ValueLatticeElement getValueFromOverflowCondition(
return ValueLatticeElement::getRange(NWR);
}
-static ValueLatticeElement getValueFromCondition(
- Value *Val, Value *Cond, bool IsTrueDest, unsigned Depth) {
+std::optional<ValueLatticeElement>
+LazyValueInfoImpl::getValueFromCondition(Value *Val, Value *Cond,
+ bool IsTrueDest, bool UseBlockValue,
+ unsigned Depth) {
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
- return getValueFromICmpCondition(Val, ICI, IsTrueDest);
+ return getValueFromICmpCondition(Val, ICI, IsTrueDest, UseBlockValue);
if (auto *EVI = dyn_cast<ExtractValueInst>(Cond))
if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
@@ -1227,7 +1270,7 @@ static ValueLatticeElement getValueFromCondition(
Value *N;
if (match(Cond, m_Not(m_Value(N))))
- return getValueFromCondition(Val, N, !IsTrueDest, Depth);
+ return getValueFromCondition(Val, N, !IsTrueDest, UseBlockValue, Depth);
Value *L, *R;
bool IsAnd;
@@ -1238,19 +1281,25 @@ static ValueLatticeElement getValueFromCondition(
else
return ValueLatticeElement::getOverdefined();
- ValueLatticeElement LV = getValueFromCondition(Val, L, IsTrueDest, Depth);
- ValueLatticeElement RV = getValueFromCondition(Val, R, IsTrueDest, Depth);
+ std::optional<ValueLatticeElement> LV =
+ getValueFromCondition(Val, L, IsTrueDest, UseBlockValue, Depth);
+ if (!LV)
+ return std::nullopt;
+ std::optional<ValueLatticeElement> RV =
+ getValueFromCondition(Val, R, IsTrueDest, UseBlockValue, Depth);
+ if (!RV)
+ return std::nullopt;
// if (L && R) -> intersect L and R
// if (!(L || R)) -> intersect !L and !R
// if (L || R) -> union L and R
// if (!(L && R)) -> union !L and !R
if (IsTrueDest ^ IsAnd) {
- LV.mergeIn(RV);
- return LV;
+ LV->mergeIn(*RV);
+ return *LV;
}
- return intersect(LV, RV);
+ return intersect(*LV, *RV);
}
// Return true if Usr has Op as an operand, otherwise false.
@@ -1302,8 +1351,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
}
/// Compute the value of Val on the edge BBFrom -> BBTo.
-static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
- BasicBlock *BBTo) {
+std::optional<ValueLatticeElement>
+LazyValueInfoImpl::getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo, bool UseBlockValue) {
// TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
@@ -1324,13 +1374,16 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- ValueLatticeElement Result = getValueFromCondition(Val, Condition,
- isTrueDest);
- if (!Result.isOverdefined())
+ std::optional<ValueLatticeElement> Result =
+ getValueFromCondition(Val, Condition, isTrueDest, UseBlockValue);
+ if (!Result)
+ return std::nullopt;
+
+ if (!Result->isOverdefined())
return Result;
if (User *Usr = dyn_cast<User>(Val)) {
- assert(Result.isOverdefined() && "Result isn't overdefined");
+ assert(Result->isOverdefined() && "Result isn't overdefined");
// Check with isOperationFoldable() first to avoid linearly iterating
// over the operands unnecessarily which can be expensive for
// instructions with many operands.
@@ -1356,8 +1409,8 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// br i1 %Condition, label %then, label %else
for (unsigned i = 0; i < Usr->getNumOperands(); ++i) {
Value *Op = Usr->getOperand(i);
- ValueLatticeElement OpLatticeVal =
- getValueFromCondition(Op, Condition, isTrueDest);
+ ValueLatticeElement OpLatticeVal = *getValueFromCondition(
+ Op, Condition, isTrueDest, /*UseBlockValue*/ false);
if (std::optional<APInt> OpConst =
OpLatticeVal.asConstantInteger()) {
Result = constantFoldUser(Usr, Op, *OpConst, DL);
@@ -1367,7 +1420,7 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
}
}
}
- if (!Result.isOverdefined())
+ if (!Result->isOverdefined())
return Result;
}
}
@@ -1432,8 +1485,12 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
if (Constant *VC = dyn_cast<Constant>(Val))
return ValueLatticeElement::get(VC);
- ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo);
- if (hasSingleValue(LocalResult))
+ std::optional<ValueLatticeElement> LocalResult =
+ getEdgeValueLocal(Val, BBFrom, BBTo, /*UseBlockValue*/ true);
+ if (!LocalResult)
+ return std::nullopt;
+
+ if (hasSingleValue(*LocalResult))
// Can't get any more precise here
return LocalResult;
@@ -1453,7 +1510,7 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// but then the result is not cached.
intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI);
- return intersect(LocalResult, InBlock);
+ return intersect(*LocalResult, InBlock);
}
ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
@@ -1499,10 +1556,12 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
std::optional<ValueLatticeElement> Result =
getEdgeValue(V, FromBB, ToBB, CxtI);
- if (!Result) {
+ while (!Result) {
+ // As the worklist only explicitly tracks block values (but not edge values)
+ // we may have to call solve() multiple times, as the edge value calculation
+ // may request additional block values.
solve();
Result = getEdgeValue(V, FromBB, ToBB, CxtI);
- assert(Result && "More work to do after problem solved?");
}
LLVM_DEBUG(dbgs() << " Result = " << *Result << "\n");
@@ -1528,13 +1587,17 @@ ValueLatticeElement LazyValueInfoImpl::getValueAtUse(const Use &U) {
if (!isGuaranteedNotToBeUndef(SI->getCondition(), AC))
break;
if (CurrU->getOperandNo() == 1)
- CondVal = getValueFromCondition(V, SI->getCondition(), true);
+ CondVal =
+ *getValueFromCondition(V, SI->getCondition(), /*IsTrueDest*/ true,
+ /*UseBlockValue*/ false);
else if (CurrU->getOperandNo() == 2)
- CondVal = getValueFromCondition(V, SI->getCondition(), false);
+ CondVal =
+ *getValueFromCondition(V, SI->getCondition(), /*IsTrueDest*/ false,
+ /*UseBlockValue*/ false);
} else if (auto *PHI = dyn_cast<PHINode>(CurrI)) {
// TODO: Use non-local query?
- CondVal =
- getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent());
+ CondVal = *getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU),
+ PHI->getParent(), /*UseBlockValue*/ false);
}
if (CondVal)
VL = intersect(VL, *CondVal);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3f76dfdaac31..67246afa2314 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -862,6 +862,15 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
return Cost;
}
+InstructionCost TargetTransformInfo::getAltInstrCost(
+ VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const {
+ InstructionCost Cost =
+ TTIImpl->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost TargetTransformInfo::getShuffleCost(
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index cac2602d455f..16d78c1ded6d 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -983,45 +983,11 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
case Instruction::Select: {
- const Value *LHS = nullptr, *RHS = nullptr;
- SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
- if (SelectPatternResult::isMinOrMax(SPF)) {
- computeKnownBits(RHS, Known, Depth + 1, Q);
- computeKnownBits(LHS, Known2, Depth + 1, Q);
- switch (SPF) {
- default:
- llvm_unreachable("Unhandled select pattern flavor!");
- case SPF_SMAX:
- Known = KnownBits::smax(Known, Known2);
- break;
- case SPF_SMIN:
- Known = KnownBits::smin(Known, Known2);
- break;
- case SPF_UMAX:
- Known = KnownBits::umax(Known, Known2);
- break;
- case SPF_UMIN:
- Known = KnownBits::umin(Known, Known2);
- break;
- }
- break;
- }
-
computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
// Only known if known in both the LHS and RHS.
Known = Known.intersectWith(Known2);
-
- if (SPF == SPF_ABS) {
- // RHS from matchSelectPattern returns the negation part of abs pattern.
- // If the negate has an NSW flag we can assume the sign bit of the result
- // will be 0 because that makes abs(INT_MIN) undefined.
- if (match(RHS, m_Neg(m_Specific(LHS))) &&
- Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS)))
- Known.Zero.setSignBit();
- }
-
break;
}
case Instruction::FPTrunc:
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index f90fca9d937f..5b57f0a25cec 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
@@ -123,6 +123,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
int OpdIdx) {
+ assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");
+
switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 8907f6fa4ff3..a027d0c21ba0 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -4218,6 +4218,9 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
// Check whether we have enough values to read a partition name.
if (OpNum + 1 < Record.size()) {
+ // Check Strtab has enough values for the partition.
+ if (Record[OpNum] + Record[OpNum + 1] > Strtab.size())
+ return error("Malformed partition, too large.");
NewGA->setPartition(
StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1]));
OpNum += 2;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 91a64d59e154..8b15bdb0aca3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5940,62 +5940,6 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
return false;
}
-bool CombinerHelper::matchSelectToLogical(MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- GSelect &Sel = cast<GSelect>(MI);
- Register DstReg = Sel.getReg(0);
- Register Cond = Sel.getCondReg();
- Register TrueReg = Sel.getTrueReg();
- Register FalseReg = Sel.getFalseReg();
-
- auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI);
- auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI);
-
- const LLT CondTy = MRI.getType(Cond);
- const LLT OpTy = MRI.getType(TrueReg);
- if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1)
- return false;
-
- // We have a boolean select.
-
- // select Cond, Cond, F --> or Cond, F
- // select Cond, 1, F --> or Cond, F
- auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI);
- if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildOr(DstReg, Cond, FalseReg);
- };
- return true;
- }
-
- // select Cond, T, Cond --> and Cond, T
- // select Cond, T, 0 --> and Cond, T
- auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI);
- if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildAnd(DstReg, Cond, TrueReg);
- };
- return true;
- }
-
- // select Cond, T, 1 --> or (not Cond), T
- if (MaybeCstFalse && MaybeCstFalse->isOne()) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg);
- };
- return true;
- }
-
- // select Cond, 0, F --> and (not Cond), F
- if (MaybeCstTrue && MaybeCstTrue->isZero()) {
- MatchInfo = [=](MachineIRBuilder &MIB) {
- MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg);
- };
- return true;
- }
- return false;
-}
-
bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
unsigned &IdxToPropagate) {
bool PropagateNaN;
@@ -6318,3 +6262,300 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
MI.getOperand(2).setReg(LHSReg);
Observer.changedInstr(MI);
}
+
+bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
+ LLT SrcTy = MRI.getType(Src);
+ if (SrcTy.isFixedVector())
+ return isConstantSplatVector(Src, 1, AllowUndefs);
+ if (SrcTy.isScalar()) {
+ if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
+ return true;
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ return IConstant && IConstant->Value == 1;
+ }
+ return false; // scalable vector
+}
+
+bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
+ LLT SrcTy = MRI.getType(Src);
+ if (SrcTy.isFixedVector())
+ return isConstantSplatVector(Src, 0, AllowUndefs);
+ if (SrcTy.isScalar()) {
+ if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
+ return true;
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ return IConstant && IConstant->Value == 0;
+ }
+ return false; // scalable vector
+}
+
+// Ignores COPYs during conformance checks.
+// FIXME scalable vectors.
+bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
+ bool AllowUndefs) {
+ GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BuildVector)
+ return false;
+ unsigned NumSources = BuildVector->getNumSources();
+
+ for (unsigned I = 0; I < NumSources; ++I) {
+ GImplicitDef *ImplicitDef =
+ getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
+ if (ImplicitDef && AllowUndefs)
+ continue;
+ if (ImplicitDef && !AllowUndefs)
+ return false;
+ std::optional<ValueAndVReg> IConstant =
+ getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+ if (IConstant && IConstant->Value == SplatValue)
+ continue;
+ return false;
+ }
+ return true;
+}
+
+// Ignores COPYs during lookups.
+// FIXME scalable vectors
+std::optional<APInt>
+CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
+ auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
+ if (IConstant)
+ return IConstant->Value;
+
+ GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BuildVector)
+ return std::nullopt;
+ unsigned NumSources = BuildVector->getNumSources();
+
+ std::optional<APInt> Value = std::nullopt;
+ for (unsigned I = 0; I < NumSources; ++I) {
+ std::optional<ValueAndVReg> IConstant =
+ getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI);
+ if (!IConstant)
+ return std::nullopt;
+ if (!Value)
+ Value = IConstant->Value;
+ else if (*Value != IConstant->Value)
+ return std::nullopt;
+ }
+ return Value;
+}
+
+// TODO: use knownbits to determine zeros
+bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
+ BuildFnTy &MatchInfo) {
+ uint32_t Flags = Select->getFlags();
+ Register Dest = Select->getReg(0);
+ Register Cond = Select->getCondReg();
+ Register True = Select->getTrueReg();
+ Register False = Select->getFalseReg();
+ LLT CondTy = MRI.getType(Select->getCondReg());
+ LLT TrueTy = MRI.getType(Select->getTrueReg());
+
+ // We only do this combine for scalar boolean conditions.
+ if (CondTy != LLT::scalar(1))
+ return false;
+
+ // Both are scalars.
+ std::optional<ValueAndVReg> TrueOpt =
+ getIConstantVRegValWithLookThrough(True, MRI);
+ std::optional<ValueAndVReg> FalseOpt =
+ getIConstantVRegValWithLookThrough(False, MRI);
+
+ if (!TrueOpt || !FalseOpt)
+ return false;
+
+ APInt TrueValue = TrueOpt->Value;
+ APInt FalseValue = FalseOpt->Value;
+
+ // select Cond, 1, 0 --> zext (Cond)
+ if (TrueValue.isOne() && FalseValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ B.buildZExtOrTrunc(Dest, Cond);
+ };
+ return true;
+ }
+
+ // select Cond, -1, 0 --> sext (Cond)
+ if (TrueValue.isAllOnes() && FalseValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ B.buildSExtOrTrunc(Dest, Cond);
+ };
+ return true;
+ }
+
+ // select Cond, 0, 1 --> zext (!Cond)
+ if (TrueValue.isZero() && FalseValue.isOne()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ B.buildZExtOrTrunc(Dest, Inner);
+ };
+ return true;
+ }
+
+ // select Cond, 0, -1 --> sext (!Cond)
+ if (TrueValue.isZero() && FalseValue.isAllOnes()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ B.buildSExtOrTrunc(Dest, Inner);
+ };
+ return true;
+ }
+
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (TrueValue - 1 == FalseValue) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Inner, Cond);
+ B.buildAdd(Dest, Inner, False);
+ };
+ return true;
+ }
+
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (TrueValue + 1 == FalseValue) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildSExtOrTrunc(Inner, Cond);
+ B.buildAdd(Dest, Inner, False);
+ };
+ return true;
+ }
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Inner, Cond);
+ // The shift amount must be scalar.
+ LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
+ auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
+ B.buildShl(Dest, Inner, ShAmtC, Flags);
+ };
+ return true;
+ }
+ // select Cond, -1, C --> or (sext Cond), C
+ if (TrueValue.isAllOnes()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildSExtOrTrunc(Inner, Cond);
+ B.buildOr(Dest, Inner, False, Flags);
+ };
+ return true;
+ }
+
+ // select Cond, C, -1 --> or (sext (not Cond)), C
+ if (FalseValue.isAllOnes()) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Not = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Not, Cond);
+ Register Inner = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildSExtOrTrunc(Inner, Not);
+ B.buildOr(Dest, Inner, True, Flags);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: use knownbits to determine zeros
+bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
+ BuildFnTy &MatchInfo) {
+ uint32_t Flags = Select->getFlags();
+ Register DstReg = Select->getReg(0);
+ Register Cond = Select->getCondReg();
+ Register True = Select->getTrueReg();
+ Register False = Select->getFalseReg();
+ LLT CondTy = MRI.getType(Select->getCondReg());
+ LLT TrueTy = MRI.getType(Select->getTrueReg());
+
+ // Boolean or fixed vector of booleans.
+ if (CondTy.isScalableVector() ||
+ (CondTy.isFixedVector() &&
+ CondTy.getElementType().getScalarSizeInBits() != 1) ||
+ CondTy.getScalarSizeInBits() != 1)
+ return false;
+
+ if (CondTy != TrueTy)
+ return false;
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Cond);
+ B.buildOr(DstReg, Ext, False, Flags);
+ };
+ return true;
+ }
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Cond);
+ B.buildAnd(DstReg, Ext, True);
+ };
+ return true;
+ }
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ // First the not.
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ // Then an ext to match the destination register.
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Inner);
+ B.buildOr(DstReg, Ext, True, Flags);
+ };
+ return true;
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*Select);
+ // First the not.
+ Register Inner = MRI.createGenericVirtualRegister(CondTy);
+ B.buildNot(Inner, Cond);
+ // Then an ext to match the destination register.
+ Register Ext = MRI.createGenericVirtualRegister(TrueTy);
+ B.buildZExtOrTrunc(Ext, Inner);
+ B.buildAnd(DstReg, Ext, False);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GSelect *Select = cast<GSelect>(&MI);
+
+ if (tryFoldSelectOfConstants(Select, MatchInfo))
+ return true;
+
+ if (tryFoldBoolSelectToLogic(Select, MatchInfo))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index a032b31a1fc7..51e944d0279f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -175,8 +175,46 @@ public:
if (MachineInstr *MI = I->second.MI) {
std::optional<DestSourcePair> CopyOperands =
isCopyInstr(*MI, TII, UseCopyInstr);
- markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()},
- TRI);
+
+ MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
+ MCRegister Src = CopyOperands->Source->getReg().asMCReg();
+
+ markRegsUnavailable(Def, TRI);
+
+ // Since we clobber the destination of a copy, the semantic of Src's
+ // "DefRegs" to contain Def is no longer effectual. We will also need
+ // to remove the record from the copy maps that indicates Src defined
+ // Def. Failing to do so might cause the target to miss some
+ // opportunities to further eliminate redundant copy instructions.
+ // Consider the following sequence during the
+ // ForwardCopyPropagateBlock procedure:
+ // L1: r0 = COPY r9 <- TrackMI
+ // L2: r0 = COPY r8 <- TrackMI (Remove r9 defined r0 from tracker)
+ // L3: use r0 <- Remove L2 from MaybeDeadCopies
+ // L4: early-clobber r9 <- Clobber r9 (L2 is still valid in tracker)
+ // L5: r0 = COPY r8 <- Remove NopCopy
+ for (MCRegUnit SrcUnit : TRI.regunits(Src)) {
+ auto SrcCopy = Copies.find(SrcUnit);
+ if (SrcCopy != Copies.end() && SrcCopy->second.LastSeenUseInCopy) {
+ // If SrcCopy defines multiple values, we only need
+ // to erase the record for Def in DefRegs.
+ for (auto itr = SrcCopy->second.DefRegs.begin();
+ itr != SrcCopy->second.DefRegs.end(); itr++) {
+ if (*itr == Def) {
+ SrcCopy->second.DefRegs.erase(itr);
+ // If DefReg becomes empty after removal, we can remove the
+ // SrcCopy from the tracker's copy maps. We only remove those
+ // entries solely record the Def is defined by Src. If an
+ // entry also contains the definition record of other Def'
+ // registers, it cannot be cleared.
+ if (SrcCopy->second.DefRegs.empty() && !SrcCopy->second.MI) {
+ Copies.erase(SrcCopy);
+ }
+ break;
+ }
+ }
+ }
+ }
}
// Now we can erase the copy.
Copies.erase(I);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0d46c7868d87..eafa95ce7fcf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -546,6 +546,7 @@ namespace {
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFP_TO_BF16(SDNode *N);
+ SDValue visitBF16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitVPOp(SDNode *N);
SDValue visitGET_FPENV_MEM(SDNode *N);
@@ -2047,6 +2048,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
+ case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
case ISD::FREEZE: return visitFREEZE(N);
case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
@@ -26256,14 +26258,17 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
}
SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ auto Op = N->getOpcode();
+ assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
+ "opcode should be FP16_TO_FP or BF16_TO_FP.");
SDValue N0 = N->getOperand(0);
- // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
+ // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
if (AndConst && AndConst->getAPIntValue() == 0xffff) {
- return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
- N0.getOperand(0));
+ return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
}
}
@@ -26280,6 +26285,11 @@ SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
+ // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
+ return visitFP16_TO_FP(N);
+}
+
SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a27febe15db8..34fa1f5a7ed1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -495,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
// constraints on the %dst register, COPY can target all legal register
// classes.
- unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned SubIdx = Node->getConstantOperandVal(1);
const TargetRegisterClass *TRC =
TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
@@ -611,7 +611,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
// Create the new VReg in the destination class and emit a copy.
- unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned DstRCIdx = Node->getConstantOperandVal(1);
const TargetRegisterClass *DstRC =
TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
Register NewVReg = MRI->createVirtualRegister(DstRC);
@@ -629,7 +629,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
void InstrEmitter::EmitRegSequence(SDNode *Node,
DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned) {
- unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ unsigned DstRCIdx = Node->getConstantOperandVal(0);
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
@@ -1309,8 +1309,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- unsigned Flags =
- cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned Flags = Node->getConstantOperandVal(i);
const InlineAsm::Flag F(Flags);
const unsigned NumVals = F.getNumOperandRegisters();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index f73ddfee2b90..e3acb58327a8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -492,8 +492,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
--NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- unsigned Flags =
- cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned Flags = Node->getConstantOperandVal(i);
const InlineAsm::Flag F(Flags);
unsigned NumVals = F.getNumOperandRegisters();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 47c137d2bcad..dcecb2e0e7fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -331,7 +331,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
unsigned Opcode = Node->getMachineOpcode();
if (Opcode == TargetOpcode::REG_SEQUENCE) {
- unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ unsigned DstRCIdx = Node->getConstantOperandVal(0);
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
RegClass = RC->getID();
Cost = RegSequenceCost;
@@ -1369,8 +1369,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
--NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- unsigned Flags =
- cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned Flags = Node->getConstantOperandVal(i);
const InlineAsm::Flag F(Flags);
unsigned NumVals = F.getNumOperandRegisters();
@@ -2298,8 +2297,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
continue;
}
if (POpc == TargetOpcode::REG_SEQUENCE) {
- unsigned DstRCIdx =
- cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
+ unsigned DstRCIdx = PN->getConstantOperandVal(0);
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
unsigned RCId = RC->getID();
// REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 81facf92e55a..0e17bba2398e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5470,7 +5470,7 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
Ops[i].getOperand(0).getValueType() != VT ||
(IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
!isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
- cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
+ Ops[i].getConstantOperandAPInt(1) != i) {
IsIdentity = false;
break;
}
@@ -7408,7 +7408,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
Src.getOperand(1).getOpcode() == ISD::Constant) {
G = cast<GlobalAddressSDNode>(Src.getOperand(0));
- SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ SrcDelta = Src.getConstantOperandVal(1);
}
if (!G)
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3dc6e4bbcf46..f28211ac113c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -4181,8 +4181,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
Msg << "\nIn function: " << MF->getName();
} else {
bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
- unsigned iid =
- cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ unsigned iid = N->getConstantOperandVal(HasInputChain);
if (iid < Intrinsic::num_intrinsics)
Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid);
else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp
index f65ec27ff875..5a058bd712a3 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp
@@ -105,8 +105,7 @@ llvm::orc::createDWARFContext(LinkGraph &G) {
auto SecData = getSectionData(Sec);
auto Name = Sec.getName();
// DWARFContext expects the section name to not start with a dot
- if (Name.starts_with("."))
- Name = Name.drop_front();
+ Name.consume_front(".");
LLVM_DEBUG(dbgs() << "Creating DWARFContext section " << Name
<< " with size " << SecData.size() << "\n");
DWARFSectionData[Name] =
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index a19e17029810..e259c393d07e 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -768,11 +768,11 @@ Error LLJITBuilderState::prepareForConstruction() {
// create a default one.
if (!SetupProcessSymbolsJITDylib && LinkProcessSymbolsByDefault) {
LLVM_DEBUG(dbgs() << "Creating default Process JD setup function\n");
- SetupProcessSymbolsJITDylib = [this](LLJIT &J) -> Expected<JITDylibSP> {
+ SetupProcessSymbolsJITDylib = [](LLJIT &J) -> Expected<JITDylibSP> {
auto &JD =
J.getExecutionSession().createBareJITDylib("<Process Symbols>");
- auto G = orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
- DL->getGlobalPrefix());
+ auto G = EPCDynamicLibrarySearchGenerator::GetForTargetProcess(
+ J.getExecutionSession());
if (!G)
return G.takeError();
JD.addGenerator(std::move(*G));
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ce428f78dc84..f6cf358119fb 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6026,6 +6026,17 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
bool IsFailOnly) {
+ AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+ return createAtomicCompare(Loc, X, V, R, E, D, AO, Op, IsXBinopExpr,
+ IsPostfixUpdate, IsFailOnly, Failure);
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
+ const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
+ AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
+ omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
+ bool IsFailOnly, AtomicOrdering Failure) {
+
if (!updateToLocation(Loc))
return Loc.IP;
@@ -6040,7 +6051,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
bool IsInteger = E->getType()->isIntegerTy();
if (Op == OMPAtomicCompareOp::EQ) {
- AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
AtomicCmpXchgInst *Result = nullptr;
if (!IsInteger) {
IntegerType *IntCastTy =
diff --git a/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp
index c64e9c04e199..58e4b74f4b22 100644
--- a/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/contrib/llvm-project/llvm/lib/FuzzMutate/FuzzerCLI.cpp
@@ -86,13 +86,12 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) {
Args.push_back("-passes=gvn");
} else if (Opt == "sccp") {
Args.push_back("-passes=sccp");
-
} else if (Opt == "loop_predication") {
Args.push_back("-passes=loop-predication");
} else if (Opt == "guard_widening") {
Args.push_back("-passes=guard-widening");
} else if (Opt == "loop_rotate") {
- Args.push_back("-passes=loop(rotate)");
+ Args.push_back("-passes=loop-rotate");
} else if (Opt == "loop_unswitch") {
Args.push_back("-passes=loop(simple-loop-unswitch)");
} else if (Opt == "loop_unroll") {
@@ -107,7 +106,18 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) {
Args.push_back("-passes=loop-reduce");
} else if (Opt == "irce") {
Args.push_back("-passes=irce");
-
+ } else if (Opt == "dse") {
+ Args.push_back("-passes=dse");
+ } else if (Opt == "loop_idiom") {
+ Args.push_back("-passes=loop-idiom");
+ } else if (Opt == "reassociate") {
+ Args.push_back("-passes=reassociate");
+ } else if (Opt == "lower_matrix_intrinsics") {
+ Args.push_back("-passes=lower-matrix-intrinsics");
+ } else if (Opt == "memcpyopt") {
+ Args.push_back("-passes=memcpyopt");
+ } else if (Opt == "sroa") {
+ Args.push_back("-passes=sroa");
} else if (Triple(Opt).getArch()) {
Args.push_back("-mtriple=" + Opt.str());
} else {
diff --git a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
index eab05eed428e..c6dc42e8ac88 100644
--- a/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DebugInfo.cpp
@@ -2115,6 +2115,10 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
if (F.hasFnAttribute(Attribute::OptimizeNone))
return /*Changed*/ false;
+ // FIXME: https://github.com/llvm/llvm-project/issues/76545
+ if (F.hasFnAttribute(Attribute::SanitizeHWAddress))
+ return /*Changed*/ false;
+
bool Changed = false;
auto *DL = &F.getParent()->getDataLayout();
// Collect a map of {backing storage : dbg.declares} (currently "backing
diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
index fd48d5080ff6..e43f111113b4 100644
--- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
@@ -1526,8 +1526,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
StringRef Name = SectionName;
// For user-defined custom sections, strip the prefix
- if (Name.starts_with(".custom_section."))
- Name = Name.substr(strlen(".custom_section."));
+ Name.consume_front(".custom_section.");
MCSymbol *Begin = Sec.getBeginSymbol();
if (Begin) {
diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
index dfe86a45df32..ccc29d0cb73d 100644
--- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp
@@ -1484,6 +1484,11 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
}
uint32_t BodySize = FunctionEnd - Ctx.Ptr;
+ // Ensure that Function is within Ctx's buffer.
+ if (Ctx.Ptr + BodySize > Ctx.End) {
+ return make_error<GenericBinaryError>("Function extends beyond buffer",
+ object_error::parse_failed);
+ }
Function.Body = ArrayRef<uint8_t>(Ctx.Ptr, BodySize);
// This will be set later when reading in the linking metadata section.
Function.Comdat = UINT32_MAX;
@@ -1662,10 +1667,18 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
auto &Sym = getWasmSymbol(Symb);
if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION &&
- isDefinedFunctionIndex(Sym.Info.ElementIndex))
- return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset;
- else
- return getSymbolValue(Symb);
+ isDefinedFunctionIndex(Sym.Info.ElementIndex)) {
+ // For object files, use the section offset. The linker relies on this.
+ // For linked files, use the file offset. This behavior matches the way
+ // browsers print stack traces and is useful for binary size analysis.
+ // (see https://webassembly.github.io/spec/web-api/index.html#conventions)
+ uint32_t Adjustment = isRelocatableObject() || isSharedObject()
+ ? 0
+ : Sections[CodeSection].Offset;
+ return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset +
+ Adjustment;
+ }
+ return getSymbolValue(Symb);
}
uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
index 8f62df79d5b7..b547cf7181b1 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -539,7 +539,7 @@ Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
const IntPtrT FPtr = swap(I->FunctionPointer);
if (!FPtr)
continue;
- Symtab.mapAddress(FPtr, I->NameRef);
+ Symtab.mapAddress(FPtr, swap(I->NameRef));
}
return success();
}
diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
index 7256e9a29329..a9b7e209915a 100644
--- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp
@@ -75,7 +75,6 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"xcvmac", RISCVExtensionVersion{1, 0}},
{"xcvmem", RISCVExtensionVersion{1, 0}},
{"xcvsimd", RISCVExtensionVersion{1, 0}},
- {"xsfcie", RISCVExtensionVersion{1, 0}},
{"xsfvcp", RISCVExtensionVersion{1, 0}},
{"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}},
{"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}},
@@ -191,11 +190,17 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
{"zacas", RISCVExtensionVersion{1, 0}},
+ {"zcmop", RISCVExtensionVersion{0, 2}},
+
{"zfbfmin", RISCVExtensionVersion{0, 8}},
{"zicfilp", RISCVExtensionVersion{0, 4}},
+ {"zicfiss", RISCVExtensionVersion{0, 4}},
+
{"zicond", RISCVExtensionVersion{1, 0}},
+ {"zimop", RISCVExtensionVersion{0, 1}},
+
{"ztso", RISCVExtensionVersion{0, 1}},
{"zvfbfmin", RISCVExtensionVersion{0, 8}},
@@ -1006,6 +1011,7 @@ static const char *ImpliedExtsZcb[] = {"zca"};
static const char *ImpliedExtsZcd[] = {"d", "zca"};
static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"};
static const char *ImpliedExtsZcf[] = {"f", "zca"};
+static const char *ImpliedExtsZcmop[] = {"zca"};
static const char *ImpliedExtsZcmp[] = {"zca"};
static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"};
static const char *ImpliedExtsZdinx[] = {"zfinx"};
@@ -1017,6 +1023,7 @@ static const char *ImpliedExtsZfinx[] = {"zicsr"};
static const char *ImpliedExtsZhinx[] = {"zhinxmin"};
static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
static const char *ImpliedExtsZicntr[] = {"zicsr"};
+static const char *ImpliedExtsZicfiss[] = {"zicsr", "zimop"};
static const char *ImpliedExtsZihpm[] = {"zicsr"};
static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
@@ -1078,6 +1085,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"zcd"}, {ImpliedExtsZcd}},
{{"zce"}, {ImpliedExtsZce}},
{{"zcf"}, {ImpliedExtsZcf}},
+ {{"zcmop"}, {ImpliedExtsZcmop}},
{{"zcmp"}, {ImpliedExtsZcmp}},
{{"zcmt"}, {ImpliedExtsZcmt}},
{{"zdinx"}, {ImpliedExtsZdinx}},
@@ -1088,6 +1096,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"zfinx"}, {ImpliedExtsZfinx}},
{{"zhinx"}, {ImpliedExtsZhinx}},
{{"zhinxmin"}, {ImpliedExtsZhinxmin}},
+ {{"zicfiss"}, {ImpliedExtsZicfiss}},
{{"zicntr"}, {ImpliedExtsZicntr}},
{{"zihpm"}, {ImpliedExtsZihpm}},
{{"zk"}, {ImpliedExtsZk}},
diff --git a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
index 168a63bb2d96..2bf68b7972e7 100644
--- a/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm-project/llvm/lib/Support/Windows/Path.inc
@@ -154,7 +154,10 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
return "";
llvm::sys::path::make_preferred(PathNameUTF8);
- return std::string(PathNameUTF8.data());
+
+ SmallString<256> RealPath;
+ sys::fs::real_path(PathNameUTF8, RealPath);
+ return std::string(RealPath);
}
UniqueID file_status::getUniqueID() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 463ec41b94e9..476d99c2a7e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1950,7 +1950,7 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
unsigned BaseReg, unsigned Op) {
unsigned TileNum = 0;
if (BaseReg != AArch64::ZA)
- TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ TileNum = N->getConstantOperandVal(2);
if (!SelectSMETile(BaseReg, TileNum))
return;
@@ -2145,8 +2145,7 @@ void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
+ unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
N->getOperand(NumVecs + 3), N->getOperand(0)};
@@ -2185,8 +2184,7 @@ void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
const EVT ResTys[] = {MVT::i64, // Type of the write back register
RegSeq->getValueType(0), MVT::Other};
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
+ unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
SDValue Ops[] = {RegSeq,
CurDAG->getTargetConstant(LaneNo, dl,
@@ -2237,8 +2235,7 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
+ unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
N->getOperand(NumVecs + 3), N->getOperand(0)};
@@ -2269,8 +2266,7 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
const EVT ResTys[] = {MVT::i64, // Type of the write back register
MVT::Other};
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
+ unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
N->getOperand(NumVecs + 2), // Base Register
@@ -2576,8 +2572,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
case AArch64::UBFMXri:
Opc = NOpc;
Opd0 = N->getOperand(0);
- Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+ Immr = N->getConstantOperandVal(1);
+ Imms = N->getConstantOperandVal(2);
return true;
}
// Unreachable
@@ -3877,7 +3873,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
assert(isa<ConstantSDNode>(N->getOperand(2)) &&
"Expected a constant integer expression.");
unsigned Reg = PMapper->Encoding;
- uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ uint64_t Immed = N->getConstantOperandVal(2);
CurDAG->SelectNodeTo(
N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
@@ -4173,8 +4169,7 @@ bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
SDValue IRG_SP = N->getOperand(2);
if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
- cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
- Intrinsic::aarch64_irg_sp) {
+ IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
return false;
}
@@ -4183,7 +4178,7 @@ bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
SDValue FiOp = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
- int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ int TagOffset = N->getConstantOperandVal(3);
SDNode *Out = CurDAG->getMachineNode(
AArch64::TAGPstack, DL, MVT::i64,
@@ -4203,7 +4198,7 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
// General case for unrelated pointers in Op1 and Op2.
SDLoc DL(N);
- int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ int TagOffset = N->getConstantOperandVal(3);
SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
{N->getOperand(1), N->getOperand(2)});
SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
@@ -4219,7 +4214,7 @@ bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
// Bail when not a "cast" like insert_subvector.
- if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0)
+ if (N->getConstantOperandVal(2) != 0)
return false;
if (!N->getOperand(0).isUndef())
return false;
@@ -4250,7 +4245,7 @@ bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
// Bail when not a "cast" like extract_subvector.
- if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0)
+ if (N->getConstantOperandVal(1) != 0)
return false;
// Bail when normal isel can do the job.
@@ -4422,7 +4417,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
default:
break;
@@ -5179,7 +5174,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
default:
break;
@@ -5782,7 +5777,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
}
case ISD::INTRINSIC_VOID: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
if (Node->getNumOperands() >= 3)
VT = Node->getOperand(2)->getValueType(0);
switch (IntNo) {
@@ -6806,7 +6801,7 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
return EVT();
- switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) {
+ switch (Root->getConstantOperandVal(1)) {
default:
return EVT();
case Intrinsic::aarch64_sme_ldr:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dffe69bdb900..102fd0c3dae2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2196,7 +2196,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
}
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID: {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
switch (IntNo) {
default:
break;
@@ -3922,9 +3922,9 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// 4: bool isDataCache
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned IsWrite = Op.getConstantOperandVal(2);
+ unsigned Locality = Op.getConstantOperandVal(3);
+ unsigned IsData = Op.getConstantOperandVal(4);
bool IsStream = !Locality;
// When the locality number is set
@@ -4973,10 +4973,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(2);
- unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- unsigned Locality = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
- unsigned IsStream = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
- unsigned IsData = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned IsWrite = Op.getConstantOperandVal(3);
+ unsigned Locality = Op.getConstantOperandVal(4);
+ unsigned IsStream = Op.getConstantOperandVal(5);
+ unsigned IsData = Op.getConstantOperandVal(6);
unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
(!IsData << 3) | // IsDataCache bit
(Locality << 1) | // Cache level bits
@@ -5039,7 +5039,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
@@ -5218,8 +5218,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_ptrue:
- return getPTrue(DAG, dl, Op.getValueType(),
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ return getPTrue(DAG, dl, Op.getValueType(), Op.getConstantOperandVal(1));
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -6478,7 +6477,7 @@ static unsigned getIntrinsicID(const SDNode *N) {
default:
return Intrinsic::not_intrinsic;
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
if (IID < Intrinsic::num_intrinsics)
return IID;
return Intrinsic::not_intrinsic;
@@ -10009,7 +10008,7 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
while (Depth--)
@@ -10076,7 +10075,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
SDValue ReturnAddress;
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -10942,7 +10941,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ unsigned EltNo = V.getConstantOperandVal(1);
Source->MinElt = std::min(Source->MinElt, EltNo);
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
@@ -13329,7 +13328,7 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
"Only cases that extract a fixed length vector are supported!");
EVT InVT = Op.getOperand(0).getValueType();
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Idx = Op.getConstantOperandVal(1);
unsigned Size = Op.getValueSizeInBits();
// If we don't have legal types yet, do nothing
@@ -13375,7 +13374,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
"Only expect to lower inserts into scalable vectors!");
EVT InVT = Op.getOperand(1).getValueType();
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Idx = Op.getConstantOperandVal(2);
SDValue Vec0 = Op.getOperand(0);
SDValue Vec1 = Op.getOperand(1);
@@ -13715,11 +13714,10 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
SplatBitSize, HasAnyUndefs);
- bool IsSplatUniform =
- SrcVT.getVectorElementType().getSizeInBits() >= SplatBitSize;
- bool IsZero = IsCnst && SplatValue == 0 && IsSplatUniform;
- bool IsOne = IsCnst && SplatValue == 1 && IsSplatUniform;
- bool IsMinusOne = IsCnst && SplatValue.isAllOnes() && IsSplatUniform;
+ bool IsZero = IsCnst && SplatValue == 0;
+ bool IsOne =
+ IsCnst && SrcVT.getScalarSizeInBits() == SplatBitSize && SplatValue == 1;
+ bool IsMinusOne = IsCnst && SplatValue.isAllOnes();
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
@@ -14247,7 +14245,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
assert(VT != MVT::i64 && "Expected illegal VSCALE node");
SDLoc DL(Op);
- APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
+ APInt MulImm = Op.getConstantOperandAPInt(0);
return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL,
VT);
}
@@ -18343,7 +18341,7 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) {
return false;
if (N.getOperand(0).getValueType().isScalableVector())
return false;
- return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
+ return N.getConstantOperandAPInt(1) ==
N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
@@ -18399,8 +18397,8 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
// TODO: we want the operands of the Cmp not the csel
SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
SetCCInfo.IsAArch64 = true;
- SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
- cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+ SetCCInfo.Info.AArch64.CC =
+ static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2));
// Check that the operands matches the constraints:
// (1) Both operands must be constants.
@@ -21585,7 +21583,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
bool IsDupOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
@@ -22501,7 +22499,7 @@ static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
static SDValue performTBZCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
- unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Bit = N->getConstantOperandVal(2);
bool Invert = false;
SDValue TestSrc = N->getOperand(1);
SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
@@ -23789,7 +23787,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performMULLCombine(N, DCI, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ switch (N->getConstantOperandVal(1)) {
case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
@@ -23940,8 +23938,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
case Intrinsic::aarch64_rndr:
case Intrinsic::aarch64_rndrrs: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntrinsicID = N->getConstantOperandVal(1);
auto Register =
(IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
: AArch64SysReg::RNDRRS);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index e3220d103ae0..a21b4b77166e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -896,7 +896,7 @@ static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm,
// Immediate is encoded as the top 16-bits of an unsigned 18-bit negative
// PC-relative offset.
uint64_t ImmVal = Imm;
- if (ImmVal < 0 || ImmVal > (1 << 16))
+ if (ImmVal > (1 << 16))
return Fail;
ImmVal = -ImmVal;
if (!Decoder->tryAddingSymbolicOperand(Inst, (ImmVal << 2), Addr,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 8b909f53c844..1d0e8be80d07 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -623,6 +623,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({s32, s64})
.legalFor(PackedVectorAllTypeList)
.maxScalar(0, s64)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(0, s64, 2)
.lower();
// FP conversions
@@ -1406,7 +1410,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_umax:
case Intrinsic::aarch64_neon_umin:
case Intrinsic::aarch64_neon_fmax:
- case Intrinsic::aarch64_neon_fmin: {
+ case Intrinsic::aarch64_neon_fmin:
+ case Intrinsic::aarch64_neon_fmaxnm:
+ case Intrinsic::aarch64_neon_fminnm: {
MachineIRBuilder MIB(MI);
if (IntrinsicID == Intrinsic::aarch64_neon_smax)
MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
@@ -1422,6 +1428,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
{MI.getOperand(2), MI.getOperand(3)});
+ else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
+ MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
+ {MI.getOperand(2), MI.getOperand(3)});
+ else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
+ MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
+ {MI.getOperand(2), MI.getOperand(3)});
MI.eraseFromParent();
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b0eac567ec9f..bffea82ab8f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -377,7 +377,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
return Subtarget->getRegisterInfo()->getRegClass(RegClass);
}
case AMDGPU::REG_SEQUENCE: {
- unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned RCID = N->getConstantOperandVal(0);
const TargetRegisterClass *SuperRC =
Subtarget->getRegisterInfo()->getRegClass(RCID);
@@ -724,7 +724,7 @@ bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
unsigned ShAmtBits) const {
assert(N->getOpcode() == ISD::AND);
- const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ const APInt &RHS = N->getConstantOperandAPInt(1);
if (RHS.countr_one() >= ShAmtBits)
return true;
@@ -2672,7 +2672,7 @@ void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
}
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
- unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntrID = N->getConstantOperandVal(1);
switch (IntrID) {
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
@@ -2690,7 +2690,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
}
void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
- unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntrID = N->getConstantOperandVal(0);
unsigned Opcode;
switch (IntrID) {
case Intrinsic::amdgcn_wqm:
@@ -2731,7 +2731,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
}
void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
- unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntrID = N->getConstantOperandVal(1);
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
case Intrinsic::amdgcn_ds_gws_barrier:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 541a5b62450d..8fbc90a6db9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -682,7 +682,7 @@ static bool hasSourceMods(const SDNode *N) {
case ISD::BITCAST:
return false;
case ISD::INTRINSIC_WO_CHAIN: {
- switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
+ switch (N->getConstantOperandVal(0)) {
case Intrinsic::amdgcn_interp_p1:
case Intrinsic::amdgcn_interp_p2:
case Intrinsic::amdgcn_interp_mov:
@@ -837,7 +837,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
case ISD::TokenFactor:
return true;
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntrID = N->getConstantOperandVal(0);
switch (IntrID) {
case Intrinsic::amdgcn_readfirstlane:
case Intrinsic::amdgcn_readlane:
@@ -1489,7 +1489,7 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
SmallVector<SDValue, 8> Args;
- unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Start = Op.getConstantOperandVal(1);
EVT VT = Op.getValueType();
EVT SrcVT = Op.getOperand(0).getValueType();
@@ -2502,8 +2502,7 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) {
case ISD::FFREXP:
return true;
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Src.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Src.getConstantOperandVal(0);
switch (IntrinsicID) {
case Intrinsic::amdgcn_frexp_mant:
return true;
@@ -3601,7 +3600,7 @@ static SDValue simplifyMul24(SDNode *Node24,
SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1);
unsigned NewOpcode = Node24->getOpcode();
if (IsIntrin) {
- unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
+ unsigned IID = Node24->getConstantOperandVal(0);
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
NewOpcode = AMDGPUISD::MUL_I24;
@@ -3821,7 +3820,7 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
SDNode *N, DAGCombinerInfo &DCI) const {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
@@ -5652,7 +5651,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IID = Op.getConstantOperandVal(0);
switch (IID) {
case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::amdgcn_workitem_id_y:
@@ -5834,8 +5833,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return SNaN;
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID
- = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(0);
// TODO: Handle more intrinsics
switch (IntrinsicID) {
case Intrinsic::amdgcn_cubeid:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index eaf72d7157ee..36e07d944c94 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -642,6 +642,7 @@ defm int_amdgcn_global_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_csub : noret_op;
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
+defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
defm int_amdgcn_global_atomic_fmin_num : noret_op;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c9412f720c62..fba060464a6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4690,6 +4690,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
+ case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index e83e644d13f3..2d8dc9d47225 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -11,7 +11,7 @@ def SGPRRegBank : RegisterBank<"SGPR",
>;
def VGPRRegBank : RegisterBank<"VGPR",
- [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
+ [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
>;
// It is helpful to distinguish conditions from ordinary SGPRs.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index db5d2bbcf5bb..fc47b02c98e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -346,8 +346,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsSGPR = true;
Width = 1;
} else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
- AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
- AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
+ AMDGPU::VGPR_16RegClass.contains(Reg)) {
IsSGPR = false;
Width = 1;
} else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
index 459400e3359c..79e9312034da 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
@@ -85,7 +85,6 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<UniformityInfoWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index beb670669581..4cc8871a00fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -243,6 +243,7 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
+def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 9bc3ba161c9e..1bfb7c0edd80 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -109,9 +109,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
// FIXME: preserve PostDominatorTreeWrapperPass
}
- // No divergent values are changed, only blocks and branch edges.
- AU.addPreserved<UniformityInfoWrapperPass>();
-
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3b69a37728ea..abd7e911beef 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5416,11 +5416,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
- if (IVersion.Major < 10)
- return Error(IDRange.Start, "directive requires gfx10+", IDRange);
+ if (IVersion.Major < 10 || IVersion.Major >= 12)
+ return Error(IDRange.Start, "directive requires gfx10 or gfx11",
+ IDRange);
SharedVGPRCount = Val;
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
- COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
+ COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
@@ -5522,7 +5523,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
(AccumOffset / 4 - 1));
}
- if (IVersion.Major >= 10) {
+ if (IVersion.Major >= 10 && IVersion.Major < 12) {
// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
return TokError("shared_vgpr_count directive not valid on "
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 7939d0036568..67be7b0fd642 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1284,9 +1284,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
bool IsHi) const {
- unsigned RCID =
- IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;
- return createRegOperand(RCID, RegIdx);
+ unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
+ return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
}
// Decode Literals for insts which always have a literal in the encoding
@@ -2000,34 +1999,60 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
return MCDisassembler::Fail;
} else if (isGFX10Plus()) {
- if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
- PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ // Bits [0-3].
+ if (!isGFX12Plus()) {
+ if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
+ PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
+ COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
+ } else {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "SHARED_VGPR_COUNT",
+ COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
+ }
} else {
- PRINT_PSEUDO_DIRECTIVE_COMMENT(
- "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0)
+ return MCDisassembler::Fail;
}
- if (isGFX11Plus()) {
+ // Bits [4-11].
+ if (isGFX11()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
- COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
+ COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
- COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
- COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
+ COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
+ } else if (isGFX12Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1)
+ return MCDisassembler::Fail;
+ }
+
+ // Bits [12].
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2)
+ return MCDisassembler::Fail;
+
+ // Bits [13].
+ if (isGFX12Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
+ COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
} else {
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3)
return MCDisassembler::Fail;
}
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
+ // Bits [14-30].
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4)
return MCDisassembler::Fail;
+ // Bits [31].
if (isGFX11Plus()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
- COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
} else {
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5)
return MCDisassembler::Fail;
}
} else if (FourByteBuffer) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 0dd2b3f5c2c9..615f8cd54d8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -926,9 +926,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho
defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
-} // End is_flat_global = 1
-
+let SubtargetPredicate = isGFX12Plus in {
+ defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
+} // End SubtargetPredicate = isGFX12Plus
+} // End is_flat_global = 1
let SubtargetPredicate = HasFlatScratchInsts in {
defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
@@ -1529,6 +1531,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
+let OtherPredicates = [isGFX12Plus] in {
+ defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>;
+}
+
let OtherPredicates = [isGFX10Plus] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
@@ -2654,6 +2660,7 @@ defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_A
defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">;
defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">;
defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
+defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">;
// ENC_VSCRATCH.
defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index a855cf585205..e135a4e25dd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -475,8 +475,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+ }
+ if (IVersion.Major >= 10 && IVersion.Major < 12) {
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
}
if (IVersion.Major >= 12)
PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index c1ba9c514874..9a2fb0bc37b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -424,8 +424,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return lowerADDRSPACECAST(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(1);
switch (IntrinsicID) {
case Intrinsic::r600_store_swizzle: {
SDLoc DL(Op);
@@ -449,8 +448,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(0);
EVT VT = Op.getValueType();
SDLoc DL(Op);
switch (IntrinsicID) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fc119aa61d01..0e857e6ac71b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1240,6 +1240,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_fmax_num:
+ case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
@@ -5304,7 +5305,7 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
- VT == MVT::v32f32);
+ VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16);
SDValue Lo0, Hi0;
SDValue Op0 = Op.getOperand(0);
@@ -5388,7 +5389,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// Get the rounding mode from the last operand
- int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ int RoundMode = Op.getConstantOperandVal(1);
if (RoundMode == (int)RoundingMode::TowardPositive)
Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD;
else if (RoundMode == (int)RoundingMode::TowardNegative)
@@ -5698,7 +5699,7 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
switch (IID) {
case Intrinsic::amdgcn_make_buffer_rsrc:
Results.push_back(lowerPointerAsRsrcIntrin(N, DAG));
@@ -5836,7 +5837,7 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
- switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
+ switch (Intr->getConstantOperandVal(1)) {
case Intrinsic::amdgcn_if:
return AMDGPUISD::IF;
case Intrinsic::amdgcn_else:
@@ -5985,7 +5986,7 @@ SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Checking the depth
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
+ if (Op.getConstantOperandVal(0) != 0)
return DAG.getConstant(0, DL, VT);
MachineFunction &MF = DAG.getMachineFunction();
@@ -7634,7 +7635,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(0);
// TODO: Should this propagate fast-math-flags?
@@ -7788,7 +7789,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
- unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned CPol = Op.getConstantOperandVal(3);
if (CPol & ~((Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12)
? AMDGPU::CPol::ALL
: AMDGPU::CPol::ALL_pregfx12))
@@ -8038,7 +8039,7 @@ SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned IntrID = Op.getConstantOperandVal(1);
SDLoc DL(Op);
switch (IntrID) {
@@ -8134,8 +8135,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
- unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned Glc = Op.getConstantOperandVal(5);
+ unsigned Slc = Op.getConstantOperandVal(6);
unsigned IdxEn = getIdxEn(Op.getOperand(3));
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -8223,10 +8224,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT LoadVT = Op.getValueType();
auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
- unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
- unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
- unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned Dfmt = Op.getConstantOperandVal(7);
+ unsigned Nfmt = Op.getConstantOperandVal(8);
+ unsigned Glc = Op.getConstantOperandVal(9);
+ unsigned Slc = Op.getConstantOperandVal(10);
unsigned IdxEn = getIdxEn(Op.getOperand(3));
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -8313,7 +8314,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_buffer_atomic_xor:
case Intrinsic::amdgcn_buffer_atomic_fadd: {
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned Slc = Op.getConstantOperandVal(6);
unsigned IdxEn = getIdxEn(Op.getOperand(4));
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -8474,7 +8475,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned Slc = Op.getConstantOperandVal(7);
unsigned IdxEn = getIdxEn(Op.getOperand(5));
SDValue Ops[] = {
Op.getOperand(0), // Chain
@@ -8878,7 +8879,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(1);
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
@@ -8943,10 +8944,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
- unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
- unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
- unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
+ unsigned Dfmt = Op.getConstantOperandVal(8);
+ unsigned Nfmt = Op.getConstantOperandVal(9);
+ unsigned Glc = Op.getConstantOperandVal(10);
+ unsigned Slc = Op.getConstantOperandVal(11);
unsigned IdxEn = getIdxEn(Op.getOperand(4));
SDValue Ops[] = {
Chain,
@@ -9029,8 +9030,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
- unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
- unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned Glc = Op.getConstantOperandVal(6);
+ unsigned Slc = Op.getConstantOperandVal(7);
unsigned IdxEn = getIdxEn(Op.getOperand(4));
SDValue Ops[] = {
Chain,
@@ -12069,8 +12070,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID
- = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(0);
// TODO: Handle more intrinsics
switch (IntrinsicID) {
case Intrinsic::amdgcn_cvt_pkrtz:
@@ -15008,7 +15008,7 @@ void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned Opc = Op.getOpcode();
switch (Opc) {
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IID = Op.getConstantOperandVal(0);
switch (IID) {
case Intrinsic::amdgcn_mbcnt_lo:
case Intrinsic::amdgcn_mbcnt_hi: {
@@ -15251,11 +15251,9 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
case ISD::CALLSEQ_END:
return true;
case ISD::INTRINSIC_WO_CHAIN:
- return AMDGPU::isIntrinsicSourceOfDivergence(
- cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(0));
case ISD::INTRINSIC_W_CHAIN:
- return AMDGPU::isIntrinsicSourceOfDivergence(
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(1));
case AMDGPUISD::ATOMIC_CMP_SWAP:
case AMDGPUISD::ATOMIC_LOAD_FMIN:
case AMDGPUISD::ATOMIC_LOAD_FMAX:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ebe23a5eac57..396d22c7ec18 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -273,8 +273,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
// subtract the index by one.
Offset0Idx -= get(Opc0).NumDefs;
Offset1Idx -= get(Opc1).NumDefs;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue();
+ Offset0 = Load0->getConstantOperandVal(Offset0Idx);
+ Offset1 = Load1->getConstantOperandVal(Offset1Idx);
return true;
}
@@ -955,12 +955,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
- bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
- AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
- AMDGPU::AGPR_LO16RegClass.contains(DestReg);
- bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
- AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
- AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
+ bool DstLow = !AMDGPU::isHi(DestReg, RI);
+ bool SrcLow = !AMDGPU::isHi(SrcReg, RI);
MCRegister NewDestReg = RI.get32BitRegister(DestReg);
MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
@@ -7202,6 +7198,18 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
Register DstReg = Inst.getOperand(0).getReg();
const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
+ // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and
+ // hope for the best.
+ if (Inst.isCopy() && DstReg.isPhysical() &&
+ RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
+ // TODO: Only works for 32 bit registers.
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg())
+ .add(Inst.getOperand(1));
+ Inst.eraseFromParent();
+ return;
+ }
+
if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
// Instead of creating a copy where src and dst are the same register
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
index f9bc623abcd0..8310c6b57dad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1487,8 +1487,18 @@ foreach Index = 0-31 in {
// 16-bit bitcast
def : BitConvert <i16, f16, VGPR_32>;
def : BitConvert <f16, i16, VGPR_32>;
+def : BitConvert <f16, bf16, VGPR_32>;
+def : BitConvert <bf16, f16, VGPR_32>;
+
def : BitConvert <i16, f16, SReg_32>;
def : BitConvert <f16, i16, SReg_32>;
+def : BitConvert <f16, bf16, SReg_32>;
+def : BitConvert <bf16, f16, SReg_32>;
+
+def : BitConvert <i16, bf16, VGPR_32>;
+def : BitConvert <bf16, i16, VGPR_32>;
+def : BitConvert <i16, bf16, SReg_32>;
+def : BitConvert <bf16, i16, SReg_32>;
// 32-bit bitcast
def : BitConvert <i32, f32, VGPR_32>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 021d797344c5..a93cf5cad411 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -330,8 +330,10 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
RegPressureIgnoredUnits.resize(getNumRegUnits());
RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
- for (auto Reg : AMDGPU::VGPR_HI16RegClass)
- RegPressureIgnoredUnits.set(*regunits(Reg).begin());
+ for (auto Reg : AMDGPU::VGPR_16RegClass) {
+ if (AMDGPU::isHi(Reg, *this))
+ RegPressureIgnoredUnits.set(*regunits(Reg).begin());
+ }
// HACK: Until this is fully tablegen'd.
static llvm::once_flag InitializeRegSplitPartsFlag;
@@ -2661,7 +2663,7 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
if (BitWidth == 1)
return &AMDGPU::VReg_1RegClass;
if (BitWidth == 16)
- return &AMDGPU::VGPR_LO16RegClass;
+ return &AMDGPU::VGPR_16RegClass;
if (BitWidth == 32)
return &AMDGPU::VGPR_32RegClass;
return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth)
@@ -2808,8 +2810,6 @@ getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
const TargetRegisterClass *
SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
- if (BitWidth == 16)
- return &AMDGPU::VGPR_LO16RegClass;
if (BitWidth == 32)
return &AMDGPU::AV_32RegClass;
return ST.needsAlignedVGPRs()
@@ -3041,8 +3041,6 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
default:
return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
case AMDGPU::VGPR_32RegClassID:
- case AMDGPU::VGPR_LO16RegClassID:
- case AMDGPU::VGPR_HI16RegClassID:
return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::SGPR_LO16RegClassID:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 981da13fe089..c94b894c5841 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -376,7 +376,7 @@ def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
let HasSGPR = 1;
}
-def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
+def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> {
let CopyCost = 1;
let Size = 16;
let isAllocatable = 0;
@@ -385,7 +385,7 @@ def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
// TODO: Do we need to set DwarfRegAlias on register tuples?
-def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
+def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
(add (sequence "SGPR%u_LO16", 0, 105))> {
let AllocationPriority = 0;
let Size = 16;
@@ -393,7 +393,7 @@ def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
let HasSGPR = 1;
}
-def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
+def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
(add (sequence "SGPR%u_HI16", 0, 105))> {
let isAllocatable = 0;
let Size = 16;
@@ -402,7 +402,7 @@ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
}
// SGPR 32-bit registers
-def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add (sequence "SGPR%u", 0, 105))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
@@ -451,14 +451,14 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s"
def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;
// Trap handler TMP 32-bit registers
-def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
+def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v2bf16], 32,
(add (sequence "TTMP%u", 0, 15))> {
let isAllocatable = 0;
let HasSGPR = 1;
}
// Trap handler TMP 16-bit registers
-def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
+def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
(add (sequence "TTMP%u_LO16", 0, 15))> {
let Size = 16;
let isAllocatable = 0;
@@ -584,24 +584,10 @@ class RegisterTypes<list<ValueType> reg_types> {
list<ValueType> types = reg_types;
}
-def Reg16Types : RegisterTypes<[i16, f16]>;
-def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
+def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
+def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
let HasVGPR = 1 in {
-def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
- (add (sequence "VGPR%u_LO16", 0, 255))> {
- let AllocationPriority = 0;
- let Size = 16;
- let GeneratePressureSet = 0;
-}
-
-def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
- (add (sequence "VGPR%u_HI16", 0, 255))> {
- let AllocationPriority = 0;
- let Size = 16;
- let GeneratePressureSet = 0;
-}
-
// VOP3 and VINTERP can access 256 lo and 256 hi registers.
def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (interleave (sequence "VGPR%u_LO16", 0, 255),
@@ -697,7 +683,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
}
// AccVGPR 32-bit registers
-def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 0;
let Size = 32;
@@ -749,7 +735,7 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
-def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add FP_REG, SP_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
@@ -757,7 +743,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
let BaseClassOrder = 10000;
}
-def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
+def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16], 32,
(add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
@@ -774,7 +760,7 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
let GeneratePressureSet = 0, HasSGPR = 1 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
@@ -783,7 +769,7 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2
let AllocationPriority = 0;
}
-def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
(add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
@@ -796,17 +782,17 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
let BaseClassOrder = 16;
}
-def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SReg_32_XM0_XEXEC, M0_CLASS)> {
let AllocationPriority = 0;
}
-def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SReg_32_XEXEC, EXEC_LO)> {
let AllocationPriority = 0;
}
-def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 0;
}
@@ -814,7 +800,7 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i
} // End GeneratePressureSet = 0
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
(add SReg_32_XM0, M0_CLASS)> {
let AllocationPriority = 0;
let HasSGPR = 1;
@@ -822,13 +808,13 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1],
}
let GeneratePressureSet = 0 in {
-def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
let HasSGPR = 1;
}
-def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
+def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16], 32,
(add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 1;
@@ -850,13 +836,13 @@ def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
let HasSGPR = 1;
}
-def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
+def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
let HasSGPR = 1;
}
-def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
+def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
@@ -864,7 +850,7 @@ def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16
let HasSGPR = 1;
}
-def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
+def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
(add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 1;
@@ -919,11 +905,11 @@ multiclass SRegClass<int numRegs,
}
defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
-defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], SGPR_128Regs, TTMP_128Regs>;
defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
-defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
+defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs>;
defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
@@ -934,7 +920,7 @@ defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
}
-def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
let HasVGPR = 1;
@@ -969,15 +955,15 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
}
}
-defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
+defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4],
(add VGPR_64)>;
defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
-defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
+defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add VGPR_128)>;
defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;
-defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;
+defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], (add VGPR_256)>;
defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>;
defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>;
defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
@@ -1007,7 +993,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
(add AGPR_64)>;
defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
-defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
+defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add AGPR_128)>;
defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
@@ -1046,14 +1032,14 @@ def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
let HasVGPR = 1;
}
-def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
}
-def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
let HasVGPR = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 0f92a56237ac..a91d77175234 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2296,8 +2296,6 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
// (move from MC* level to Target* level). Return size in bits.
unsigned getRegBitWidth(unsigned RCID) {
switch (RCID) {
- case AMDGPU::VGPR_LO16RegClassID:
- case AMDGPU::VGPR_HI16RegClassID:
case AMDGPU::SGPR_LO16RegClassID:
case AMDGPU::AGPR_LO16RegClassID:
return 16;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index d3cefb339d9e..7f52501b5d90 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -190,9 +190,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
// because dealing with the write to high half of the register is
// difficult.
def : GCNPat <
- (build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
+ (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
(v2f16 (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
@@ -203,9 +203,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
def : GCNPat <
(build_vector
f16:$elt0,
- (AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
+ (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
+ (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))),
(v2f16 (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$src2_modifiers, $src2,
@@ -215,12 +215,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
def : GCNPat <
(AMDGPUclamp (build_vector
- (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
+ (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))),
- (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers))))),
+ (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
- (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))),
+ (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers))))))),
(v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0,
$hi_src1_modifiers, $hi_src1,
$hi_src2_modifiers, $hi_src2,
@@ -243,8 +243,8 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
>;
def : GCNPat <
- (build_vector f16:$elt0, (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)),
- (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))),
+ (build_vector f16:$elt0, (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers)))))),
(v2f16 (mixhi_inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
(i32 0), (i32 0),
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
index 5d9a366f5ed5..2265f5db6737 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -751,7 +751,7 @@ SDValue ARCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
- assert(cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0 &&
+ assert(Op.getConstantOperandVal(0) == 0 &&
"Only support lowering frame addr of current frame.");
Register FrameReg = ARI.getFrameRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index a0776296b8eb..ef02dc997011 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4499,8 +4499,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
- unsigned ShOpVal =
- cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShOpVal = DefNode->getConstantOperandVal(2);
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
@@ -4512,8 +4511,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl only.
- unsigned ShAmt =
- cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShAmt = DefNode->getConstantOperandVal(2);
if (ShAmt == 0 || ShAmt == 2)
Latency = *Latency - 1;
break;
@@ -4526,8 +4524,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
- unsigned ShOpVal =
- cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShOpVal = DefNode->getConstantOperandVal(2);
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 984d8d3e0b08..adc429b61bbc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2422,8 +2422,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
SDValue Chain = N->getOperand(0);
- unsigned Lane =
- cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
@@ -2587,7 +2586,7 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
Ops.push_back(N->getOperand(2)); // vector of base addresses
- int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ int32_t ImmValue = N->getConstantOperandVal(3);
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
if (Predicated)
@@ -2622,7 +2621,7 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
// The shift count
if (Immediate) {
- int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ int32_t ImmValue = N->getConstantOperandVal(3);
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
} else {
Ops.push_back(N->getOperand(3));
@@ -2630,7 +2629,7 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
// The immediate saturation operand, if any
if (HasSaturationOperand) {
- int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
+ int32_t SatOp = N->getConstantOperandVal(4);
int SatBit = (SatOp == 64 ? 0 : 1);
Ops.push_back(getI32Imm(SatBit, Loc));
}
@@ -2685,7 +2684,7 @@ void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
// and then an immediate shift count
Ops.push_back(N->getOperand(1));
Ops.push_back(N->getOperand(2));
- int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ int32_t ImmValue = N->getConstantOperandVal(3);
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
if (Predicated)
@@ -4138,14 +4137,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (InGlue.getOpcode() == ARMISD::CMPZ) {
if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
SDValue Int = InGlue.getOperand(0);
- uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+ uint64_t ID = Int->getConstantOperandVal(1);
// Handle low-overhead loops.
if (ID == Intrinsic::loop_decrement_reg) {
SDValue Elements = Int.getOperand(2);
- SDValue Size = CurDAG->getTargetConstant(
- cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
- MVT::i32);
+ SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
+ dl, MVT::i32);
SDValue Args[] = { Elements, Size, Int.getOperand(0) };
SDNode *LoopDec =
@@ -4715,7 +4713,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
default:
break;
@@ -4732,9 +4730,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
SmallVector<SDValue, 5> Ops;
- Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
- Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
- Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
+ Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
+ Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
+ Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
// instruction will always be '1111' but it is possible in assembly language to specify
@@ -5181,7 +5179,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(0);
switch (IntNo) {
default:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d00b7853816e..9f3bcffc7a99 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4110,7 +4110,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
@@ -4289,13 +4289,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
return Op.getOperand(0);
SDLoc dl(Op);
- unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
if (!isRead &&
(!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
// ARMv7 with MP extension has PLDW.
return Op.getOperand(0);
- unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned isData = Op.getConstantOperandVal(4);
if (Subtarget->isThumb()) {
// Invert the bits.
isRead = ~isRead & 1;
@@ -4800,7 +4800,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
!isSignedIntSetCC(CC)) {
- unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
+ unsigned Mask = LHS.getConstantOperandVal(1);
auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
uint64_t RHSV = RHSC->getZExtValue();
if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
@@ -4823,9 +4823,8 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
isa<ConstantSDNode>(RHS) &&
cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
- unsigned ShiftAmt =
- cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
+ LHS.getConstantOperandVal(1) < 31) {
+ unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
DAG.getVTList(MVT::i32, MVT::i32),
LHS.getOperand(0),
@@ -6112,7 +6111,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
EVT VT = Op.getValueType();
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
@@ -6135,7 +6134,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
Register FrameReg = ARI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
@@ -8221,7 +8220,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ unsigned EltNo = V.getConstantOperandVal(1);
Source->MinElt = std::min(Source->MinElt, EltNo);
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
@@ -9034,7 +9033,7 @@ static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
SDValue Conv =
DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
- unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Lane = Op.getConstantOperandVal(2);
unsigned LaneWidth =
getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
@@ -9097,7 +9096,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
SDValue Conv =
DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
- unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Lane = Op.getConstantOperandVal(1);
unsigned LaneWidth =
getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
@@ -10682,7 +10681,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(0);
unsigned Opc = 0;
if (IntNo == Intrinsic::arm_smlald)
Opc = ARMISD::SMLALD;
@@ -14842,14 +14841,14 @@ static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
assert(N->getOpcode() == ARMISD::BFI);
SDValue From = N->getOperand(1);
- ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
+ ToMask = ~N->getConstantOperandAPInt(2);
FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
// If the Base came from a SHR #C, we can deduce that it is really testing bit
// #C in the base of the SHR.
if (From->getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(From->getOperand(1))) {
- APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
+ APInt Shift = From->getConstantOperandAPInt(1);
assert(Shift.getLimitedValue() < 32 && "Shift too large!");
FromMask <<= Shift.getLimitedValue(31);
From = From->getOperand(0);
@@ -14908,7 +14907,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
- unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned InvMask = N->getConstantOperandVal(2);
unsigned LSB = llvm::countr_zero(~InvMask);
unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
assert(Width <
@@ -15448,8 +15447,7 @@ static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
- ARMCC::CondCodes Cond =
- (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
SDLoc dl(N);
// vcmp X, 0, cc -> vcmpz X, cc
@@ -15794,7 +15792,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
unsigned NewOpc = 0;
unsigned NumVecs = 0;
if (Target.isIntrinsic) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
default:
llvm_unreachable("unexpected intrinsic for Neon base update");
@@ -16254,12 +16252,10 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
// For the stores, where there are multiple intrinsics we only actually want
// to post-inc the last of the them.
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- if (IntNo == Intrinsic::arm_mve_vst2q &&
- cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1)
+ unsigned IntNo = N->getConstantOperandVal(1);
+ if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
return SDValue();
- if (IntNo == Intrinsic::arm_mve_vst4q &&
- cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3)
+ if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
return SDValue();
// Search for a use of the address operand that is an increment.
@@ -16381,7 +16377,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return false;
unsigned NumVecs = 0;
unsigned NewOpc = 0;
- unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ unsigned IntNo = VLD->getConstantOperandVal(1);
if (IntNo == Intrinsic::arm_neon_vld2lane) {
NumVecs = 2;
NewOpc = ARMISD::VLD2DUP;
@@ -16397,8 +16393,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// First check that all the vldN-lane uses are VDUPLANEs and that the lane
// numbers match the load.
- unsigned VLDLaneNo =
- cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
UI != UE; ++UI) {
// Ignore uses of the chain result.
@@ -16406,7 +16401,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
continue;
SDNode *User = *UI;
if (User->getOpcode() != ARMISD::VDUPLANE ||
- VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ VLDLaneNo != User->getConstantOperandVal(1))
return false;
}
@@ -16479,7 +16474,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
// Make sure the VMOV element size is not bigger than the VDUPLANE elements.
unsigned EltSize = Op.getScalarValueSizeInBits();
// The canonical VMOV for a zero vector uses a 32-bit element size.
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(0);
unsigned EltBits;
if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
EltSize = 8;
@@ -17479,7 +17474,7 @@ static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(0);
switch (IntNo) {
default:
// Don't do anything for most intrinsics.
@@ -17669,7 +17664,7 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
case Intrinsic::arm_mve_addv: {
// Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
// which allow PerformADDVecReduce to turn it into VADDLV when possible.
- bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ bool Unsigned = N->getConstantOperandVal(2);
unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
}
@@ -17678,7 +17673,7 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
case Intrinsic::arm_mve_addlv_predicated: {
// Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
// which recombines the two outputs into an i64
- bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ bool Unsigned = N->getConstantOperandVal(2);
unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
(Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
(Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
@@ -18193,7 +18188,7 @@ static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
}
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
+ unsigned IntOp = N.getConstantOperandVal(1);
if (IntOp != Intrinsic::test_start_loop_iterations &&
IntOp != Intrinsic::loop_decrement_reg)
return SDValue();
@@ -18271,7 +18266,7 @@ static SDValue PerformHWLoopCombine(SDNode *N,
SDLoc dl(Int);
SelectionDAG &DAG = DCI.DAG;
SDValue Elements = Int.getOperand(2);
- unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+ unsigned IntOp = Int->getConstantOperandVal(1);
assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
&& "expected single br user");
SDNode *Br = *N->use_begin();
@@ -18305,8 +18300,8 @@ static SDValue PerformHWLoopCombine(SDNode *N,
DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
return Res;
} else {
- SDValue Size = DAG.getTargetConstant(
- cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
+ SDValue Size =
+ DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
SDValue Args[] = { Int.getOperand(0), Elements, Size, };
SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
DAG.getVTList(MVT::i32, MVT::Other), Args);
@@ -19051,7 +19046,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ switch (N->getConstantOperandVal(1)) {
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld1x2:
case Intrinsic::arm_neon_vld1x3:
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 196122e45ab8..e67a1e2ed509 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -335,7 +335,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) {
return false;
}
- int CST = (int)cast<ConstantSDNode>(BasePtr.getOperand(1))->getZExtValue();
+ int CST = (int)BasePtr.getConstantOperandVal(1);
SDValue Chain = ST->getChain();
EVT VT = ST->getValue().getValueType();
SDLoc DL(N);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
index cd1dcfaea0eb..d36bfb188ed3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -298,8 +298,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
SDValue SrcHi =
DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
DAG.getConstant(1, dl, MVT::i16));
- uint64_t ShiftAmount =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ uint64_t ShiftAmount = N->getConstantOperandVal(1);
if (ShiftAmount == 16) {
// Special case these two operations because they appear to be used by the
// generic codegen parts to lower 32-bit numbers.
@@ -367,7 +366,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
}
}
- uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ uint64_t ShiftAmount = N->getConstantOperandVal(1);
SDValue Victim = N->getOperand(0);
switch (Op.getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 909c7c005735..d8139958e9fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -193,7 +193,7 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
default:
break;
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
case Intrinsic::bpf_load_byte:
case Intrinsic::bpf_load_half:
@@ -469,7 +469,7 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node,
if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN)
return;
- unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue();
+ unsigned IntNo = BaseV->getConstantOperandVal(1);
uint64_t MaskV = MaskN->getZExtValue();
if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) ||
diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index e3b4a2dc048a..90f70b83a02d 100644
--- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -1219,7 +1219,7 @@ SDValue CSKYTargetLowering::LowerFRAMEADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
Register FrameReg = RI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
@@ -1240,7 +1240,7 @@ SDValue CSKYTargetLowering::LowerRETURNADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index f930015026a5..eb5c59672224 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -192,7 +192,7 @@ MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
return nullptr;
SDLoc dl(IntN);
- unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+ unsigned IntNo = IntN->getConstantOperandVal(1);
static std::map<unsigned,unsigned> LoadPciMap = {
{ Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
@@ -284,18 +284,18 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
// can provide an address of an unsigned variable to store the result of
// a sign-extending intrinsic into (or the other way around).
ISD::LoadExtType IntExt;
- switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
- case Intrinsic::hexagon_circ_ldub:
- case Intrinsic::hexagon_circ_lduh:
- IntExt = ISD::ZEXTLOAD;
- break;
- case Intrinsic::hexagon_circ_ldw:
- case Intrinsic::hexagon_circ_ldd:
- IntExt = ISD::NON_EXTLOAD;
- break;
- default:
- IntExt = ISD::SEXTLOAD;
- break;
+ switch (C->getConstantOperandVal(1)) {
+ case Intrinsic::hexagon_circ_ldub:
+ case Intrinsic::hexagon_circ_lduh:
+ IntExt = ISD::ZEXTLOAD;
+ break;
+ case Intrinsic::hexagon_circ_ldw:
+ case Intrinsic::hexagon_circ_ldd:
+ IntExt = ISD::NON_EXTLOAD;
+ break;
+ default:
+ IntExt = ISD::SEXTLOAD;
+ break;
}
if (N->getExtensionType() != IntExt)
return false;
@@ -325,7 +325,7 @@ bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
return false;
const SDLoc &dl(IntN);
- unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+ unsigned IntNo = IntN->getConstantOperandVal(1);
static const std::map<unsigned, unsigned> LoadBrevMap = {
{ Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
@@ -366,7 +366,7 @@ bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) {
return false;
SDLoc DL(IntN);
- unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+ unsigned IntNo = IntN->getConstantOperandVal(1);
SmallVector<SDValue, 7> Ops;
static std::map<unsigned,unsigned> LoadNPcMap = {
@@ -641,7 +641,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
if (SelectNewCircIntrinsic(N))
return;
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
IntNo == Intrinsic::hexagon_V6_vgathermh ||
@@ -665,7 +665,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
}
void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
unsigned Bits;
switch (IID) {
case Intrinsic::hexagon_S2_vsplatrb:
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index e08566718d7c..fb156f2583e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -2895,7 +2895,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
unsigned Opcode;
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
default:
llvm_unreachable("Unexpected HVX gather intrinsic.");
@@ -2934,7 +2934,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
unsigned Opcode;
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
default:
llvm_unreachable("Unexpected HVX gather intrinsic.");
@@ -2963,7 +2963,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
}
void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
SDNode *Result;
switch (IID) {
case Intrinsic::hexagon_V6_vaddcarry: {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index a7d452e7227d..51138091f4a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -669,8 +669,7 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
--NumOps; // Ignore the flag operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- const InlineAsm::Flag Flags(
- cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue());
+ const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
unsigned NumVals = Flags.getNumOperandRegisters();
++i; // Skip the ID value.
@@ -729,7 +728,7 @@ SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(1);
// Lower the hexagon_prefetch builtin to DCFETCH, as above.
if (IntNo == Intrinsic::hexagon_prefetch) {
SDValue Addr = Op.getOperand(2);
@@ -1176,7 +1175,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
@@ -1198,7 +1197,7 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
HRI.getFrameRegister(), VT);
while (Depth--)
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index db416a500f59..665e2d79c83d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -2127,7 +2127,7 @@ HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
SDValue
HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
const SDLoc &dl(Op);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
SmallVector<SDValue> Ops(Op->ops().begin(), Op->ops().end());
auto Swap = [&](SDValue P) {
@@ -2922,7 +2922,7 @@ SDValue
HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
Op.getOpcode() == HexagonISD::TL_TRUNCATE);
- unsigned Opc = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Opc = Op.getConstantOperandVal(2);
return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index cbb5c2b998e2..17d7ffb586f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1057,7 +1057,7 @@ SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
const unsigned Offset = -4;
@@ -1080,7 +1080,7 @@ SDValue LanaiTargetLowering::LowerFRAMEADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Lanai::FP, VT);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
while (Depth--) {
const unsigned Offset = -8;
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 276374afee38..66a37fce5dda 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -85,7 +85,7 @@ class LoongArchAsmParser : public MCTargetAsmParser {
// "emitLoadAddress*" functions.
void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg,
const MCExpr *Symbol, SmallVectorImpl<Inst> &Insts,
- SMLoc IDLoc, MCStreamer &Out);
+ SMLoc IDLoc, MCStreamer &Out, bool RelaxHint = false);
// Helper to emit pseudo instruction "la.abs $rd, sym".
void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
@@ -748,12 +748,14 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info,
void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg,
const MCExpr *Symbol,
SmallVectorImpl<Inst> &Insts,
- SMLoc IDLoc, MCStreamer &Out) {
+ SMLoc IDLoc, MCStreamer &Out,
+ bool RelaxHint) {
MCContext &Ctx = getContext();
for (LoongArchAsmParser::Inst &Inst : Insts) {
unsigned Opc = Inst.Opc;
LoongArchMCExpr::VariantKind VK = Inst.VK;
- const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx);
+ const LoongArchMCExpr *LE =
+ LoongArchMCExpr::create(Symbol, VK, Ctx, RelaxHint);
switch (Opc) {
default:
llvm_unreachable("unexpected opcode");
@@ -854,7 +856,7 @@ void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc,
Insts.push_back(
LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12));
- emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out);
+ emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true);
}
void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc,
@@ -900,7 +902,7 @@ void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc,
Insts.push_back(
LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12));
- emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out);
+ emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true);
}
void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc,
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4794a131edae..e14bbadf9ed2 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UNDEF, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Legal);
@@ -406,6 +406,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerWRITE_REGISTER(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
@@ -514,6 +516,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
}
SDValue
+LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VecTy = Op->getOperand(0)->getValueType(0);
+ SDValue Idx = Op->getOperand(1);
+ EVT EltTy = VecTy.getVectorElementType();
+ unsigned NumElts = VecTy.getVectorNumElements();
+
+ if (isa<ConstantSDNode>(Idx) &&
+ (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
+ EltTy == MVT::f64 ||
+ cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2))
+ return Op;
+
+ return SDValue();
+}
+
+SDValue
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
if (isa<ConstantSDNode>(Op->getOperand(2)))
@@ -569,7 +588,7 @@ SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
int GRLenInBytes = Subtarget.getGRLen() / 8;
while (Depth--) {
@@ -588,7 +607,7 @@ SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
return SDValue();
// Currently only support lowering return address for current frame.
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+ if (Op.getConstantOperandVal(0) != 0) {
DAG.getContext()->emitError(
"return address can only be determined for the current frame");
return SDValue();
@@ -1244,7 +1263,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
case Intrinsic::loongarch_csrrd_w:
case Intrinsic::loongarch_csrrd_d: {
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(2);
return !isUInt<14>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
@@ -1252,7 +1271,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::loongarch_csrwr_w:
case Intrinsic::loongarch_csrwr_d: {
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(3);
return !isUInt<14>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
@@ -1261,7 +1280,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
}
case Intrinsic::loongarch_csrxchg_w:
case Intrinsic::loongarch_csrxchg_d: {
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(4);
return !isUInt<14>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
@@ -1287,7 +1306,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
{Chain, Op.getOperand(2)});
}
case Intrinsic::loongarch_lddir_d: {
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(3);
return !isUInt<8>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: Op;
@@ -1295,7 +1314,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::loongarch_movfcsr2gr: {
if (!Subtarget.hasBasicF())
return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(2);
return !isUInt<2>(Imm)
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
: DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
@@ -1441,7 +1460,7 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
ASRT_LE_GT_CASE(asrtgt_d)
#undef ASRT_LE_GT_CASE
case Intrinsic::loongarch_ldpte_d: {
- unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Imm = Op.getConstantOperandVal(3);
return !Subtarget.is64Bit()
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
: !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
@@ -1454,53 +1473,53 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_b:
return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<5>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_b:
return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<4>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_h:
return (!isShiftedInt<8, 1>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<4>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 2", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_h:
return (!isShiftedInt<8, 1>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<3>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 2", DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_w:
return (!isShiftedInt<8, 2>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<3>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 4", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_w:
return (!isShiftedInt<8, 2>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<2>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 4", DAG)
: SDValue();
case Intrinsic::loongarch_lasx_xvstelm_d:
return (!isShiftedInt<8, 3>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<2>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 8", DAG)
: SDValue();
case Intrinsic::loongarch_lsx_vstelm_d:
return (!isShiftedInt<8, 3>(
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
- !isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
+ !isUInt<1>(Op.getConstantOperandVal(5)))
? emitIntrinsicErrorMessage(
Op, "argument out of range or not a multiple of 8", DAG)
: SDValue();
@@ -1673,7 +1692,7 @@ replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
unsigned ResOp) {
const StringRef ErrorMsgOOR = "argument out of range";
- unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue();
+ unsigned Imm = Node->getConstantOperandVal(2);
if (!isUInt<N>(Imm)) {
emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
/*WithChain=*/false);
@@ -1976,7 +1995,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
break;
}
case Intrinsic::loongarch_csrwr_w: {
- unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ unsigned Imm = N->getConstantOperandVal(3);
if (!isUInt<14>(Imm)) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
@@ -1991,7 +2010,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
break;
}
case Intrinsic::loongarch_csrxchg_w: {
- unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
+ unsigned Imm = N->getConstantOperandVal(4);
if (!isUInt<14>(Imm)) {
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 2d73a7394946..6f8878f9ccd5 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -279,6 +279,7 @@ private:
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index ec6983d0f487..b3c11bc5423d 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk),
(XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>;
-// XVREPL128VEI_{W/D}
+// XVREPLVE0_{W/D}
def : Pat<(lasxsplatf32 FPR32:$fj),
- (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+ (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>;
def : Pat<(lasxsplatf64 FPR64:$fj),
- (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+ (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>;
// Loads/Stores
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
@@ -1590,42 +1590,18 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
(VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
-def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)),
- (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>;
-def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)),
- (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>;
-def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)),
- (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>;
-def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)),
- (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>;
-
-// Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj,
- i64:$rk),
- sub_32)),
- GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj,
- i64:$rk),
- sub_32)),
- GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)),
- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk),
- sub_32)),
- GPR)>;
-def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)),
- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk),
- sub_64)),
- GPR)>;
-def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
- (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>;
-def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
- (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
+def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
+ (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
+def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
+ (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
+def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)),
+ (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>;
+def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)),
+ (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>;
// vselect
-def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,
- (v32i8 (SplatPat_uimm8 uimm8:$imm)))),
+def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)),
+ LASX256:$xj)),
(XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>;
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)),
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index e468176885d7..5569c2cd15b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
(f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>;
// vselect
-def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd,
- (v16i8 (SplatPat_uimm8 uimm8:$imm)))),
+def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)),
+ LSX128:$vj)),
(VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>;
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)),
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 45169becca37..d2ea062dc09a 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/EndianStream.h"
@@ -120,12 +121,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
assert(MO.isExpr() && "getExprOpValue expects only expressions");
+ bool RelaxCandidate = false;
+ bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax);
const MCExpr *Expr = MO.getExpr();
MCExpr::ExprKind Kind = Expr->getKind();
LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid;
if (Kind == MCExpr::Target) {
const LoongArchMCExpr *LAExpr = cast<LoongArchMCExpr>(Expr);
+ RelaxCandidate = LAExpr->getRelaxHint();
switch (LAExpr->getKind()) {
case LoongArchMCExpr::VK_LoongArch_None:
case LoongArchMCExpr::VK_LoongArch_Invalid:
@@ -270,6 +274,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
Fixups.push_back(
MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc()));
+
+ // Emit an R_LARCH_RELAX if linker relaxation is enabled and LAExpr has relax
+ // hint.
+ if (EnableRelax && RelaxCandidate) {
+ const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+ Fixups.push_back(MCFixup::create(
+ 0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc()));
+ }
+
return 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
index 993111552a31..82c992b1cc8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
@@ -25,9 +25,10 @@ using namespace llvm;
#define DEBUG_TYPE "loongarch-mcexpr"
-const LoongArchMCExpr *
-LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) {
- return new (Ctx) LoongArchMCExpr(Expr, Kind);
+const LoongArchMCExpr *LoongArchMCExpr::create(const MCExpr *Expr,
+ VariantKind Kind, MCContext &Ctx,
+ bool Hint) {
+ return new (Ctx) LoongArchMCExpr(Expr, Kind, Hint);
}
void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
index 0945cf82db86..93251f824103 100644
--- a/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
@@ -67,16 +67,18 @@ public:
private:
const MCExpr *Expr;
const VariantKind Kind;
+ const bool RelaxHint;
- explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind)
- : Expr(Expr), Kind(Kind) {}
+ explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind, bool Hint)
+ : Expr(Expr), Kind(Kind), RelaxHint(Hint) {}
public:
static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind,
- MCContext &Ctx);
+ MCContext &Ctx, bool Hint = false);
VariantKind getKind() const { return Kind; }
const MCExpr *getSubExpr() const { return Expr; }
+ bool getRelaxHint() const { return RelaxHint; }
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
index 7bd382107773..7fcc65beaa65 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
@@ -161,6 +161,16 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV16rf), MVT::i32,
MVT::i16);
+ case M68k::MOVSXd16q8:
+ return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV8dq), MVT::i16,
+ MVT::i8);
+ case M68k::MOVSXd32q8:
+ return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV8dq), MVT::i32,
+ MVT::i8);
+ case M68k::MOVSXd32q16:
+ return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV16dq), MVT::i32,
+ MVT::i16);
+
case M68k::MOVZXd16q8:
return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV8dq), MVT::i16,
MVT::i8);
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 0830cc7feb22..c4d7a0dec7f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -94,11 +94,10 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
setOperationAction(OP, MVT::i16, Expand);
}
- // FIXME It would be better to use a custom lowering
for (auto OP : {ISD::SMULO, ISD::UMULO}) {
- setOperationAction(OP, MVT::i8, Expand);
- setOperationAction(OP, MVT::i16, Expand);
- setOperationAction(OP, MVT::i32, Expand);
+ setOperationAction(OP, MVT::i8, Custom);
+ setOperationAction(OP, MVT::i16, Custom);
+ setOperationAction(OP, MVT::i32, Custom);
}
for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS})
@@ -1533,46 +1532,119 @@ bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return VT.bitsLE(MVT::i32) || Subtarget.atLeastM68020();
}
-SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
- // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
- // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
- // looks for this combo and may remove the "setcc" instruction if the "setcc"
- // has only one use.
+static bool isOverflowArithmetic(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void lowerOverflowArithmetic(SDValue Op, SelectionDAG &DAG,
+ SDValue &Result, SDValue &CCR,
+ unsigned &CC) {
SDNode *N = Op.getNode();
+ EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- unsigned BaseOp = 0;
- unsigned Cond = 0;
SDLoc DL(Op);
+
+ unsigned TruncOp = 0;
+ auto PromoteMULO = [&](unsigned ExtOp) {
+ // We don't have 8-bit multiplications, so promote i8 version of U/SMULO
+ // to i16.
+ // Ideally this should be done by legalizer but sadly there is no promotion
+ // rule for U/SMULO at this moment.
+ if (VT == MVT::i8) {
+ LHS = DAG.getNode(ExtOp, DL, MVT::i16, LHS);
+ RHS = DAG.getNode(ExtOp, DL, MVT::i16, RHS);
+ VT = MVT::i16;
+ TruncOp = ISD::TRUNCATE;
+ }
+ };
+
+ bool NoOverflow = false;
+ unsigned BaseOp = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
BaseOp = M68kISD::ADD;
- Cond = M68k::COND_VS;
+ CC = M68k::COND_VS;
break;
case ISD::UADDO:
BaseOp = M68kISD::ADD;
- Cond = M68k::COND_CS;
+ CC = M68k::COND_CS;
break;
case ISD::SSUBO:
BaseOp = M68kISD::SUB;
- Cond = M68k::COND_VS;
+ CC = M68k::COND_VS;
break;
case ISD::USUBO:
BaseOp = M68kISD::SUB;
- Cond = M68k::COND_CS;
+ CC = M68k::COND_CS;
+ break;
+ case ISD::UMULO:
+ PromoteMULO(ISD::ZERO_EXTEND);
+ NoOverflow = VT != MVT::i32;
+ BaseOp = NoOverflow ? ISD::MUL : M68kISD::UMUL;
+ CC = M68k::COND_VS;
+ break;
+ case ISD::SMULO:
+ PromoteMULO(ISD::SIGN_EXTEND);
+ NoOverflow = VT != MVT::i32;
+ BaseOp = NoOverflow ? ISD::MUL : M68kISD::SMUL;
+ CC = M68k::COND_VS;
break;
}
- // Also sets CCR.
- SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i8);
+ SDVTList VTs;
+ if (NoOverflow)
+ VTs = DAG.getVTList(VT);
+ else
+ // Also sets CCR.
+ VTs = DAG.getVTList(VT, MVT::i8);
+
SDValue Arith = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
- SDValue SetCC = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
- DAG.getConstant(Cond, DL, MVT::i8),
- SDValue(Arith.getNode(), 1));
+ Result = Arith.getValue(0);
+ if (TruncOp)
+ // Right now the only place to truncate is from i16 to i8.
+ Result = DAG.getNode(TruncOp, DL, MVT::i8, Arith);
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Arith, SetCC);
+ if (NoOverflow)
+ CCR = DAG.getConstant(0, DL, N->getValueType(1));
+ else
+ CCR = Arith.getValue(1);
+}
+
+SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDLoc DL(Op);
+
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag.
+ SDValue Result, CCR;
+ unsigned CC;
+ lowerOverflowArithmetic(Op, DAG, Result, CCR, CC);
+
+ SDValue Overflow;
+ if (isa<ConstantSDNode>(CCR)) {
+ // It's likely a result of operations that will not overflow
+ // hence no setcc is needed.
+ Overflow = CCR;
+ } else {
+ // Generate a M68kISD::SETCC.
+ Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(CC, DL, MVT::i8), CCR);
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Overflow);
}
/// Create a BTST (Bit Test) node - Test bit \p BitNo in \p Src and set
@@ -2206,8 +2278,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
isNullConstant(Cond.getOperand(1).getOperand(0))) {
SDValue Cmp = Cond.getOperand(1);
- unsigned CondCode =
- cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+ unsigned CondCode = Cond.getConstantOperandVal(0);
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == M68k::COND_EQ || CondCode == M68k::COND_NE)) {
@@ -2269,55 +2340,12 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = Cmp;
addTest = false;
}
- } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
- CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
- CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- unsigned MxOpcode;
- unsigned MxCond;
- SDVTList VTs;
- switch (CondOpcode) {
- case ISD::UADDO:
- MxOpcode = M68kISD::ADD;
- MxCond = M68k::COND_CS;
- break;
- case ISD::SADDO:
- MxOpcode = M68kISD::ADD;
- MxCond = M68k::COND_VS;
- break;
- case ISD::USUBO:
- MxOpcode = M68kISD::SUB;
- MxCond = M68k::COND_CS;
- break;
- case ISD::SSUBO:
- MxOpcode = M68kISD::SUB;
- MxCond = M68k::COND_VS;
- break;
- case ISD::UMULO:
- MxOpcode = M68kISD::UMUL;
- MxCond = M68k::COND_VS;
- break;
- case ISD::SMULO:
- MxOpcode = M68kISD::SMUL;
- MxCond = M68k::COND_VS;
- break;
- default:
- llvm_unreachable("unexpected overflowing operator");
- }
- if (CondOpcode == ISD::UMULO)
- VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32);
- else
- VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
- SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS);
-
- if (CondOpcode == ISD::UMULO)
- Cond = MxOp.getValue(2);
- else
- Cond = MxOp.getValue(1);
-
- CC = DAG.getConstant(MxCond, DL, MVT::i8);
+ } else if (isOverflowArithmetic(CondOpcode)) {
+ // Result is unused here.
+ SDValue Result;
+ unsigned CCode;
+ lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode);
+ CC = DAG.getConstant(CCode, DL, MVT::i8);
addTest = false;
}
@@ -2377,6 +2405,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Simple optimization when Cond is a constant to avoid generating
+ // M68kISD::CMOV if possible.
+ // TODO: Generalize this to use SelectionDAG::computeKnownBits.
+ if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) {
+ const APInt &C = Const->getAPIntValue();
+ if (C.countr_zero() >= 5)
+ return Op2;
+ else if (C.countr_one() >= 5)
+ return Op1;
+ }
+
// M68kISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
@@ -2466,61 +2505,15 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
}
CondOpcode = Cond.getOpcode();
- if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
- CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO) {
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- unsigned MxOpcode;
- unsigned MxCond;
- SDVTList VTs;
- // Keep this in sync with LowerXALUO, otherwise we might create redundant
- // instructions that can't be removed afterwards (i.e. M68kISD::ADD and
- // M68kISD::INC).
- switch (CondOpcode) {
- case ISD::UADDO:
- MxOpcode = M68kISD::ADD;
- MxCond = M68k::COND_CS;
- break;
- case ISD::SADDO:
- MxOpcode = M68kISD::ADD;
- MxCond = M68k::COND_VS;
- break;
- case ISD::USUBO:
- MxOpcode = M68kISD::SUB;
- MxCond = M68k::COND_CS;
- break;
- case ISD::SSUBO:
- MxOpcode = M68kISD::SUB;
- MxCond = M68k::COND_VS;
- break;
- case ISD::UMULO:
- MxOpcode = M68kISD::UMUL;
- MxCond = M68k::COND_VS;
- break;
- case ISD::SMULO:
- MxOpcode = M68kISD::SMUL;
- MxCond = M68k::COND_VS;
- break;
- default:
- llvm_unreachable("unexpected overflowing operator");
- }
+ if (isOverflowArithmetic(CondOpcode)) {
+ SDValue Result;
+ unsigned CCode;
+ lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode);
if (Inverted)
- MxCond = M68k::GetOppositeBranchCondition((M68k::CondCode)MxCond);
+ CCode = M68k::GetOppositeBranchCondition((M68k::CondCode)CCode);
+ CC = DAG.getConstant(CCode, DL, MVT::i8);
- if (CondOpcode == ISD::UMULO)
- VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i8);
- else
- VTs = DAG.getVTList(LHS.getValueType(), MVT::i8);
-
- SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS);
-
- if (CondOpcode == ISD::UMULO)
- Cond = MxOp.getValue(2);
- else
- Cond = MxOp.getValue(1);
-
- CC = DAG.getConstant(MxCond, DL, MVT::i8);
AddTest = false;
} else {
unsigned CondOpc;
@@ -3394,7 +3387,7 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Align = Op.getConstantOperandVal(2);
EVT VT = Node->getValueType(0);
// Chain the dynamic stack allocation so that it doesn't modify the stack
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h
index 02427a4e749e..d00907775f92 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kISelLowering.h
@@ -194,6 +194,15 @@ private:
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG &DAG) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override {
+ // In many cases, `GA` doesn't give the correct offset to fold. It's
+ // hard to know if the real offset actually fits into the displacement
+ // of the perspective addressing mode.
+ // Thus, we disable offset folding altogether and leave that to ISel
+ // patterns.
+ return false;
+ }
+
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
/// Emit a load of return address if tail call
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index 15d2049f62cb..3532e56e7417 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -590,8 +590,9 @@ class MxDiMuOp_DD<string MN, bits<4> CMD, bit SIGNED = false,
}
// $dreg <- $dreg op $dreg
-class MxDiMuOp_DD_Long<string MN, bits<10> CMD, bit SIGNED = false>
- : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", []> {
+class MxDiMuOp_DD_Long<string MN, SDNode NODE, bits<10> CMD, bit SIGNED = false>
+ : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst",
+ [(set i32:$dst, CCR, (NODE i32:$src, i32:$opd))]> {
let Inst = (ascend
(descend CMD,
/*MODE*/0b000, /*REGISTER*/(operand "$opd", 3)),
@@ -622,11 +623,9 @@ class MxDiMuOp_DI<string MN, bits<4> CMD, bit SIGNED = false,
} // let Constraints
} // Defs = [CCR]
-multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> {
- let isCommutable = isComm in {
- def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>;
- def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>;
- }
+multiclass MxDiMuOp<string MN, bits<4> CMD> {
+ def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>;
+ def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>;
def "S"#NAME#"d32i16" : MxDiMuOp_DI<MN#"s", CMD, /*SIGNED*/true, MxDRD32, Mxi16imm>;
def "U"#NAME#"d32i16" : MxDiMuOp_DI<MN#"u", CMD, /*SIGNED*/false, MxDRD32, Mxi16imm>;
@@ -634,8 +633,8 @@ multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> {
defm DIV : MxDiMuOp<"div", 0x8>;
-def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", 0x131, /*SIGNED*/true>;
-def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", 0x131, /*SIGNED*/false>;
+def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", sdiv, 0x131, /*SIGNED*/true>;
+def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", udiv, 0x131, /*SIGNED*/false>;
// This is used to cast immediates to 16-bits for operations which don't
// support smaller immediate sizes.
@@ -685,60 +684,53 @@ def : Pat<(urem i16:$dst, i16:$opd),
(LSR32di (LSR32di (UDIVd32d16 (MOVZXd32d16 $dst), $opd), 8), 8),
MxSubRegIndex16Lo)>;
-
-// RR i32
-def : Pat<(sdiv i32:$dst, i32:$opd), (SDIVd32d32 $dst, $opd)>;
-
-def : Pat<(udiv i32:$dst, i32:$opd), (UDIVd32d32 $dst, $opd)>;
-
-
// RI i8
-def : Pat<(sdiv i8:$dst, MximmSExt8:$opd),
+def : Pat<(sdiv i8:$dst, Mxi8immSExt8:$opd),
(EXTRACT_SUBREG
(SDIVd32i16 (MOVSXd32d8 $dst), (as_i16imm $opd)),
MxSubRegIndex8Lo)>;
-def : Pat<(udiv i8:$dst, MximmSExt8:$opd),
+def : Pat<(udiv i8:$dst, Mxi8immSExt8:$opd),
(EXTRACT_SUBREG
(UDIVd32i16 (MOVZXd32d8 $dst), (as_i16imm $opd)),
MxSubRegIndex8Lo)>;
-def : Pat<(srem i8:$dst, MximmSExt8:$opd),
+def : Pat<(srem i8:$dst, Mxi8immSExt8:$opd),
(EXTRACT_SUBREG
(ASR32di (ASR32di (SDIVd32i16 (MOVSXd32d8 $dst), (as_i16imm $opd)), 8), 8),
MxSubRegIndex8Lo)>;
-def : Pat<(urem i8:$dst, MximmSExt8:$opd),
+def : Pat<(urem i8:$dst, Mxi8immSExt8:$opd),
(EXTRACT_SUBREG
(LSR32di (LSR32di (UDIVd32i16 (MOVZXd32d8 $dst), (as_i16imm $opd)), 8), 8),
MxSubRegIndex8Lo)>;
// RI i16
-def : Pat<(sdiv i16:$dst, MximmSExt16:$opd),
+def : Pat<(sdiv i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(SDIVd32i16 (MOVSXd32d16 $dst), imm:$opd),
MxSubRegIndex16Lo)>;
-def : Pat<(udiv i16:$dst, MximmSExt16:$opd),
+def : Pat<(udiv i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(UDIVd32i16 (MOVZXd32d16 $dst), imm:$opd),
MxSubRegIndex16Lo)>;
-def : Pat<(srem i16:$dst, MximmSExt16:$opd),
+def : Pat<(srem i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(ASR32di (ASR32di (SDIVd32i16 (MOVSXd32d16 $dst), imm:$opd), 8), 8),
MxSubRegIndex16Lo)>;
-def : Pat<(urem i16:$dst, MximmSExt16:$opd),
+def : Pat<(urem i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(LSR32di (LSR32di (UDIVd32i16 (MOVZXd32d16 $dst), imm:$opd), 8), 8),
MxSubRegIndex16Lo)>;
-defm MUL : MxDiMuOp<"mul", 0xC, 1>;
+defm MUL : MxDiMuOp<"mul", 0xC>;
-def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", 0x130, /*SIGNED*/true>;
-def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", 0x130, /*SIGNED*/false>;
+def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", MxSMul, 0x130, /*SIGNED*/true>;
+def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", MxUMul, 0x130, /*SIGNED*/false>;
// RR
def : Pat<(mul i16:$dst, i16:$opd),
@@ -760,17 +752,17 @@ def : Pat<(mul i32:$dst, i32:$opd), (SMULd32d32 $dst, $opd)>;
// RI
-def : Pat<(mul i16:$dst, MximmSExt16:$opd),
+def : Pat<(mul i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(SMULd32i16 (MOVXd32d16 $dst), imm:$opd),
MxSubRegIndex16Lo)>;
-def : Pat<(mulhs i16:$dst, MximmSExt16:$opd),
+def : Pat<(mulhs i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(ASR32di (ASR32di (SMULd32i16 (MOVXd32d16 $dst), imm:$opd), 8), 8),
MxSubRegIndex16Lo)>;
-def : Pat<(mulhu i16:$dst, MximmSExt16:$opd),
+def : Pat<(mulhu i16:$dst, Mxi16immSExt16:$opd),
(EXTRACT_SUBREG
(LSR32di (LSR32di (UMULd32i16 (MOVXd32d16 $dst), imm:$opd), 8), 8),
MxSubRegIndex16Lo)>;
@@ -881,16 +873,16 @@ foreach N = ["add", "addc"] in {
(ADD32df MxDRD32:$src, MxType32.FOp:$opd)>;
// add reg, imm
- def : Pat<(!cast<SDNode>(N) i8: $src, MximmSExt8:$opd),
+ def : Pat<(!cast<SDNode>(N) i8: $src, Mxi8immSExt8:$opd),
(ADD8di MxDRD8 :$src, imm:$opd)>;
- def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd),
+ def : Pat<(!cast<SDNode>(N) i16:$src, Mxi16immSExt16:$opd),
(ADD16di MxDRD16:$src, imm:$opd)>;
// LEAp is more complex and thus will be selected over normal ADD32ri but it cannot
// be used with data registers, here by adding complexity to a simple ADD32ri insts
// we make sure it will be selected over LEAp
let AddedComplexity = 15 in {
- def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd),
+ def : Pat<(!cast<SDNode>(N) i32:$src, Mxi32immSExt32:$opd),
(ADD32di MxDRD32:$src, imm:$opd)>;
} // AddedComplexity = 15
@@ -949,11 +941,11 @@ foreach N = ["sub", "subc"] in {
(SUB32df MxDRD32:$src, MxType32.FOp:$opd)>;
// sub reg, imm
- def : Pat<(!cast<SDNode>(N) i8 :$src, MximmSExt8 :$opd),
+ def : Pat<(!cast<SDNode>(N) i8 :$src, Mxi8immSExt8 :$opd),
(SUB8di MxDRD8 :$src, imm:$opd)>;
- def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd),
+ def : Pat<(!cast<SDNode>(N) i16:$src, Mxi16immSExt16:$opd),
(SUB16di MxDRD16:$src, imm:$opd)>;
- def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd),
+ def : Pat<(!cast<SDNode>(N) i32:$src, Mxi32immSExt32:$opd),
(SUB32di MxDRD32:$src, imm:$opd)>;
// sub imm, (An)
@@ -982,11 +974,11 @@ multiclass BitwisePat<string INST, SDNode OP> {
def : Pat<(OP i32:$src, i32:$opd),
(!cast<MxInst>(INST#"32dd") MxDRD32:$src, MxDRD32:$opd)>;
// op reg, imm
- def : Pat<(OP i8: $src, MximmSExt8 :$opd),
+ def : Pat<(OP i8: $src, Mxi8immSExt8 :$opd),
(!cast<MxInst>(INST#"8di") MxDRD8 :$src, imm:$opd)>;
- def : Pat<(OP i16:$src, MximmSExt16:$opd),
+ def : Pat<(OP i16:$src, Mxi16immSExt16:$opd),
(!cast<MxInst>(INST#"16di") MxDRD16:$src, imm:$opd)>;
- def : Pat<(OP i32:$src, MximmSExt32:$opd),
+ def : Pat<(OP i32:$src, Mxi32immSExt32:$opd),
(!cast<MxInst>(INST#"32di") MxDRD32:$src, imm:$opd)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td
index 624093661d19..fa7e7aa0ed46 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrData.td
@@ -554,18 +554,21 @@ def: Pat<(MxSExtLoadi16i8 MxCP_ARID:$src),
(EXTRACT_SUBREG (MOVSXd32p8 MxARID8:$src), MxSubRegIndex16Lo)>;
def: Pat<(MxSExtLoadi16i8 MxCP_ARII:$src),
(EXTRACT_SUBREG (MOVSXd32f8 MxARII8:$src), MxSubRegIndex16Lo)>;
+def: Pat<(MxSExtLoadi16i8 MxCP_PCD:$src), (MOVSXd16q8 MxPCD8:$src)>;
// i32 <- sext i8
def: Pat<(i32 (sext i8:$src)), (MOVSXd32d8 MxDRD8:$src)>;
def: Pat<(MxSExtLoadi32i8 MxCP_ARI :$src), (MOVSXd32j8 MxARI8 :$src)>;
def: Pat<(MxSExtLoadi32i8 MxCP_ARID:$src), (MOVSXd32p8 MxARID8:$src)>;
def: Pat<(MxSExtLoadi32i8 MxCP_ARII:$src), (MOVSXd32f8 MxARII8:$src)>;
+def: Pat<(MxSExtLoadi32i8 MxCP_PCD:$src), (MOVSXd32q8 MxPCD8:$src)>;
// i32 <- sext i16
def: Pat<(i32 (sext i16:$src)), (MOVSXd32d16 MxDRD16:$src)>;
def: Pat<(MxSExtLoadi32i16 MxCP_ARI :$src), (MOVSXd32j16 MxARI16 :$src)>;
def: Pat<(MxSExtLoadi32i16 MxCP_ARID:$src), (MOVSXd32p16 MxARID16:$src)>;
def: Pat<(MxSExtLoadi32i16 MxCP_ARII:$src), (MOVSXd32f16 MxARII16:$src)>;
+def: Pat<(MxSExtLoadi32i16 MxCP_PCD:$src), (MOVSXd32q16 MxPCD16:$src)>;
// i16 <- zext i8
def: Pat<(i16 (zext i8:$src)),
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td
index 38d3127ac6a6..99bac7a59939 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrFormats.td
@@ -17,22 +17,22 @@
/// 03 M68000 (An) j address register indirect
/// 04 M68000 (An)+ o address register indirect with postincrement
/// 05 M68000 -(An) e address register indirect with predecrement
-/// 06 M68000 (i,An) p address register indirect with displacement
-/// 10 M68000 (i,An,Xn.L) f address register indirect with index and scale = 1
-/// 07 M68000 (i,An,Xn.W) F address register indirect with index and scale = 1
-/// 12 M68020 (i,An,Xn.L,SCALE) g address register indirect with index
-/// 11 M68020 (i,An,Xn.W,SCALE) G address register indirect with index
+/// 06 M68000 (d16,An) p address register indirect with displacement
+/// 10 M68000 (d8,An,Xn.L) f address register indirect with index and scale = 1
+/// 07 M68000 (d8,An,Xn.W) F address register indirect with index and scale = 1
+/// 12 M68020 (d8,An,Xn.L,SCALE) g address register indirect with index
+/// 11 M68020 (d8,An,Xn.W,SCALE) G address register indirect with index
/// 14 M68020 ([bd,An],Xn.L,SCALE,od) u memory indirect postindexed mode
/// 13 M68020 ([bd,An],Xn.W,SCALE,od) U memory indirect postindexed mode
/// 16 M68020 ([bd,An,Xn.L,SCALE],od) v memory indirect preindexed mode
/// 15 M68020 ([bd,An,Xn.W,SCALE],od) V memory indirect preindexed mode
/// 20 M68000 abs.L b absolute long address
/// 17 M68000 abs.W B absolute short address
-/// 21 M68000 (i,PC) q program counter with displacement
-/// 23 M68000 (i,PC,Xn.L) k program counter with index and scale = 1
-/// 22 M68000 (i,PC,Xn.W) K program counter with index and scale = 1
-/// 25 M68020 (i,PC,Xn.L,SCALE) l program counter with index
-/// 24 M68020 (i,PC,Xn.W,SCALE) L program counter with index
+/// 21 M68000 (d16,PC) q program counter with displacement
+/// 23 M68000 (d8,PC,Xn.L) k program counter with index and scale = 1
+/// 22 M68000 (d8,PC,Xn.W) K program counter with index and scale = 1
+/// 25 M68020 (d8,PC,Xn.L,SCALE) l program counter with index
+/// 24 M68020 (d8,PC,Xn.W,SCALE) L program counter with index
/// 27 M68020 ([bd,PC],Xn.L,SCALE,od) x program counter memory indirect postindexed mode
/// 26 M68020 ([bd,PC],Xn.W,SCALE,od) X program counter memory indirect postindexed mode
/// 31 M68020 ([bd,PC,Xn.L,SCALE],od) y program counter memory indirect preindexed mode
diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td
index dc66e103361a..84eb8e56da76 100644
--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -55,15 +55,6 @@ def MxSDT_BiArithCCRInOut : SDTypeProfile<2, 3, [
/* CCR */ SDTCisSameAs<1, 4>
]>;
-// RES1, RES2, CCR <- op LHS, RHS
-def MxSDT_2BiArithCCROut : SDTypeProfile<3, 2, [
- /* RES 1 */ SDTCisInt<0>,
- /* RES 2 */ SDTCisSameAs<0, 1>,
- /* CCR */ SDTCisVT<1, i8>,
- /* LHS */ SDTCisSameAs<0, 2>,
- /* RHS */ SDTCisSameAs<0, 3>
-]>;
-
def MxSDT_CmpTest : SDTypeProfile<1, 2, [
/* CCR */ SDTCisVT<0, i8>,
/* Ops */ SDTCisSameAs<1, 2>
@@ -134,7 +125,7 @@ def MxAddX : SDNode<"M68kISD::ADDX", MxSDT_BiArithCCRInOut>;
def MxSubX : SDNode<"M68kISD::SUBX", MxSDT_BiArithCCRInOut>;
def MxSMul : SDNode<"M68kISD::SMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>;
-def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_2BiArithCCROut, [SDNPCommutative]>;
+def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>;
def MxCmp : SDNode<"M68kISD::CMP", MxSDT_CmpTest>;
def MxBtst : SDNode<"M68kISD::BTST", MxSDT_CmpTest>;
@@ -522,9 +513,14 @@ def MxCP_PCI : ComplexPattern<iPTR, 2, "SelectPCI",
// Pattern Fragments
//===----------------------------------------------------------------------===//
-def MximmSExt8 : PatLeaf<(i8 imm)>;
-def MximmSExt16 : PatLeaf<(i16 imm)>;
-def MximmSExt32 : PatLeaf<(i32 imm)>;
+def Mxi8immSExt8 : PatLeaf<(i8 imm)>;
+def MximmSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+
+def Mxi16immSExt16 : PatLeaf<(i16 imm)>;
+def MximmSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+def Mxi32immSExt32 : PatLeaf<(i32 imm)>;
+def MximmSExt32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>;
// Used for Shifts and Rotations, since M68k immediates in these instructions
// are 1 <= i <= 8. Generally, if immediate is bigger than 8 it will be moved
@@ -717,7 +713,7 @@ foreach size = [8, 16, 32] in {
// #imm
def MxOp#size#AddrMode_i
: MxImmOpBundle<size, !cast<MxOperand>("Mxi"#size#"imm"),
- !cast<PatFrag>("MximmSExt"#size)>;
+ !cast<PatFrag>("Mxi"#size#"immSExt"#size)>;
} // foreach size = [8, 16, 32]
foreach size = [16, 32] in {
@@ -747,7 +743,7 @@ class MxType8Class<string rLet, MxOperand reg>
MxAL8, MxCP_AL,
MxPCD8, MxCP_PCD,
MxPCI8, MxCP_PCI,
- Mxi8imm, MximmSExt8,
+ Mxi8imm, Mxi8immSExt8,
Mxloadi8>;
def MxType8 : MxType8Class<?,?>;
@@ -762,7 +758,7 @@ class MxType16Class<string rLet, MxOperand reg>
MxAL16, MxCP_AL,
MxPCD16, MxCP_PCD,
MxPCI16, MxCP_PCI,
- Mxi16imm, MximmSExt16,
+ Mxi16imm, Mxi16immSExt16,
Mxloadi16>;
def MxType16 : MxType16Class<?,?>;
@@ -777,7 +773,7 @@ class MxType32Class<string rLet, MxOperand reg>
MxAL32, MxCP_AL,
MxPCD32, MxCP_PCD,
MxPCI32, MxCP_PCI,
- Mxi32imm, MximmSExt32,
+ Mxi32imm, Mxi32immSExt32,
Mxloadi32>;
def MxType32 : MxType32Class<?,?>;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index ee7762c296bf..d3b59138a5a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -964,7 +964,7 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
if (!isa<ConstantSDNode>(N->getOperand(1)))
return Op;
- uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ uint64_t ShiftAmount = N->getConstantOperandVal(1);
// Expand the stuff into sequence of shifts.
SDValue Victim = N->getOperand(0);
@@ -1269,7 +1269,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
SDLoc dl(Op);
EVT PtrVT = Op.getValueType();
@@ -1295,7 +1295,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
MSP430::R4, VT);
while (Depth--)
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
index ac679c4c01bc..c0e7eef8dd9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -164,20 +164,20 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>, GPR_64;
/// Shift Instructions
let AdditionalPredicates = [NotInMicroMips] in {
- def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl,
+ def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, mshl_64,
immZExt6>,
SRA_FM<0x38, 0>, ISA_MIPS3;
- def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl,
+ def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, msrl_64,
immZExt6>,
SRA_FM<0x3a, 0>, ISA_MIPS3;
- def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra,
+ def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, msra_64,
immZExt6>,
SRA_FM<0x3b, 0>, ISA_MIPS3;
- def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>,
+ def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, mshl_64>,
SRLV_FM<0x14, 0>, ISA_MIPS3;
- def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>,
+ def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, msra_64>,
SRLV_FM<0x17, 0>, ISA_MIPS3;
- def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>,
+ def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, msrl_64>,
SRLV_FM<0x16, 0>, ISA_MIPS3;
def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>,
SRA_FM<0x3c, 0>, ISA_MIPS3;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 77ce8ba890a8..01b41f3b2159 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/CFG.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -324,6 +326,24 @@ bool MipsDAGToDAGISel::SelectInlineAsmMemoryOperand(
return true;
}
+bool MipsDAGToDAGISel::isUnneededShiftMask(SDNode *N,
+ unsigned ShAmtBits) const {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
+
+ const APInt &RHS = N->getConstantOperandAPInt(1);
+ if (RHS.countr_one() >= ShAmtBits) {
+ LLVM_DEBUG(
+ dbgs()
+ << DEBUG_TYPE
+ << " Need optimize 'and & shl/srl/sra' and operand value bits is "
+ << RHS.countr_one() << "\n");
+ return true;
+ }
+
+ KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
+ return (Known.Zero | RHS).countr_one() >= ShAmtBits;
+}
+
char MipsDAGToDAGISel::ID = 0;
INITIALIZE_PASS(MipsDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index e41cb08712ca..52207d0f6284 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -143,6 +143,7 @@ private:
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
+ bool isUnneededShiftMask(SDNode *N, unsigned ShAmtBits) const;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
index a0cab8024386..483eba4e4f47 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -2508,7 +2508,7 @@ SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsTargetLowering::
lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// check the depth
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+ if (Op.getConstantOperandVal(0) != 0) {
DAG.getContext()->emitError(
"return address can be determined only for current frame");
return SDValue();
@@ -2529,7 +2529,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
return SDValue();
// check the depth
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+ if (Op.getConstantOperandVal(0) != 0) {
DAG.getContext()->emitError(
"return address can be determined only for current frame");
return SDValue();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td
new file mode 100644
index 000000000000..8ae3d71978b1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrCompiler.td
@@ -0,0 +1,33 @@
+//===- MipsInstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+
+def shiftMask_32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 5);
+}]>;
+
+def shiftMask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), [{
+ return isUnneededShiftMask(N, 6);
+}]>;
+
+foreach width = [32, 64] in {
+defvar shiftMask = !cast<SDPatternOperator>("shiftMask_"#width);
+def mshl_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(shl node:$src0, node:$src1), (shl node:$src0, (shiftMask node:$src1))]>;
+
+def msrl_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(srl node:$src0, node:$src1), (srl node:$src0, (shiftMask node:$src1))]>;
+
+def msra_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(sra node:$src0, node:$src1), (sra node:$src0, (shiftMask node:$src1))]>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
index 75270857ea13..4b6f4b22e71b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
// Mips profiles and nodes
//===----------------------------------------------------------------------===//
+include "MipsInstrCompiler.td"
def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
@@ -2079,17 +2080,17 @@ let AdditionalPredicates = [NotInMicroMips] in {
let AdditionalPredicates = [NotInMicroMips] in {
/// Shift Instructions
- def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, shl,
+ def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, mshl_32,
immZExt5>, SRA_FM<0, 0>, ISA_MIPS1;
- def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, srl,
+ def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, msrl_32,
immZExt5>, SRA_FM<2, 0>, ISA_MIPS1;
- def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, sra,
+ def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, msra_32,
immZExt5>, SRA_FM<3, 0>, ISA_MIPS1;
- def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, shl>,
+ def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, mshl_32>,
SRLV_FM<4, 0>, ISA_MIPS1;
- def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, srl>,
+ def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, msrl_32>,
SRLV_FM<6, 0>, ISA_MIPS1;
- def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, sra>,
+ def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, msra_32>,
SRLV_FM<7, 0>, ISA_MIPS1;
// Rotate Instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 8c865afd4207..0ed87ee0809a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -831,8 +831,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::INTRINSIC_W_CHAIN: {
- const unsigned IntrinsicOpcode =
- cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const unsigned IntrinsicOpcode = Node->getConstantOperandVal(1);
switch (IntrinsicOpcode) {
default:
break;
@@ -885,7 +884,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::INTRINSIC_WO_CHAIN: {
- switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) {
+ switch (Node->getConstantOperandVal(0)) {
default:
break;
@@ -901,8 +900,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::INTRINSIC_VOID: {
- const unsigned IntrinsicOpcode =
- cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const unsigned IntrinsicOpcode = Node->getConstantOperandVal(1);
switch (IntrinsicOpcode) {
default:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 5c34067c8888..e9788fa7ed73 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1519,7 +1519,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT ResTy = Op->getValueType(0);
APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
- << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
+ << Op->getConstantOperandAPInt(2);
SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
@@ -1528,7 +1528,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue();
+ unsigned Intrinsic = Op->getConstantOperandVal(0);
switch (Intrinsic) {
default:
return SDValue();
@@ -2300,7 +2300,7 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+ unsigned Intr = Op->getConstantOperandVal(1);
switch (Intr) {
default:
return SDValue();
@@ -2375,7 +2375,7 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
- unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+ unsigned Intr = Op->getConstantOperandVal(1);
switch (Intr) {
default:
return SDValue();
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 894a8636f458..815c46edb6fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -513,7 +513,7 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
}
bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(1);
switch (IID) {
default:
return false;
@@ -730,7 +730,7 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
}
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
switch (IID) {
default:
return false;
@@ -1246,7 +1246,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
Op1 = N->getOperand(2);
Mem = cast<MemIntrinsicSDNode>(N);
- unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(1);
switch (IID) {
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b57d185bb638..ed96339240d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4902,8 +4902,7 @@ bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
return false;
if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
- cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() !=
- Intrinsic::loop_decrement)
+ LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
return false;
if (!isa<ConstantSDNode>(RHS))
@@ -6011,7 +6010,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Op #3 is the Dest MBB
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
- unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned PCC = N->getConstantOperandVal(1);
if (EnableBranchHint)
PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 385b3b74c34d..8f27e6677afa 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2817,8 +2817,8 @@ bool PPCTargetLowering::SelectAddressRegImm(
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
- && "Cannot handle constant offsets yet!");
+ assert(!N.getOperand(1).getConstantOperandVal(1) &&
+ "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
@@ -3824,8 +3824,7 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
// Check all operands that may contain the LR.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
- const InlineAsm::Flag Flags(
- cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue());
+ const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
unsigned NumVals = Flags.getNumOperandRegisters();
++i; // Skip the ID value.
@@ -10442,8 +10441,7 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
/// information about the intrinsic.
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot, const PPCSubtarget &Subtarget) {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
CompareOpc = -1;
isDot = false;
switch (IntrinsicID) {
@@ -10728,8 +10726,7 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
/// lower, do it, otherwise return null.
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntrinsicID = Op.getConstantOperandVal(0);
SDLoc dl(Op);
@@ -10947,7 +10944,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Unpack the result based on how the target uses it.
unsigned BitNo; // Bit # of CR6.
bool InvertBit; // Invert result?
- switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ switch (Op.getConstantOperandVal(1)) {
default: // Can't happen, don't crash on invalid number though.
case 0: // Return the value of the EQ bit of CR6.
BitNo = 0; InvertBit = false;
@@ -10983,7 +10980,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
// the beginning of the argument list.
int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
SDLoc DL(Op);
- switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
+ switch (Op.getConstantOperandVal(ArgStart)) {
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
SDValue Val = Op.getOperand(ArgStart + 1);
@@ -11548,7 +11545,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// Custom lower is only done for high or low doubleword.
- int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ int Idx = Op0.getConstantOperandVal(1);
if (Idx % 2 != 0)
return SDValue();
@@ -11717,8 +11714,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
case ISD::INTRINSIC_W_CHAIN: {
- if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
- Intrinsic::loop_decrement)
+ if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
break;
assert(N->getValueType(0) == MVT::i1 &&
@@ -11734,7 +11730,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
- switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
+ switch (N->getConstantOperandVal(0)) {
case Intrinsic::ppc_pack_longdouble:
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
N->getOperand(2), N->getOperand(1)));
@@ -13654,7 +13650,7 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
EVT VT;
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ switch (N->getConstantOperandVal(1)) {
default: return false;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
@@ -13682,7 +13678,7 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
if (N->getOpcode() == ISD::INTRINSIC_VOID) {
EVT VT;
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ switch (N->getConstantOperandVal(1)) {
default: return false;
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
@@ -15546,8 +15542,7 @@ SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
}
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Intrin.getOperand(1))->getZExtValue();
+ unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
if (IntrinsicID == Intrinsic::ppc_stdcx)
StoreWidth = 8;
else if (IntrinsicID == Intrinsic::ppc_stwcx)
@@ -15979,7 +15974,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::INTRINSIC_WO_CHAIN: {
bool isLittleEndian = Subtarget.isLittleEndian();
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IID = N->getConstantOperandVal(0);
Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
: Intrinsic::ppc_altivec_lvsl);
if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
@@ -15992,36 +15987,34 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
.zext(Add.getScalarValueSizeInBits()))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
for (SDNode *U : BasePtr->uses()) {
- if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
- // We've found another LVSL/LVSR, and this address is an aligned
- // multiple of that one. The results will be the same, so use the
- // one we've just found instead.
+ if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ U->getConstantOperandVal(0) == IID) {
+ // We've found another LVSL/LVSR, and this address is an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
- return SDValue(U, 0);
- }
+ return SDValue(U, 0);
+ }
}
}
if (isa<ConstantSDNode>(Add->getOperand(1))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
for (SDNode *U : BasePtr->uses()) {
- if (U->getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(U->getOperand(1)) &&
- (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
- cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
- (1ULL << Bits) ==
- 0) {
- SDNode *OtherAdd = U;
- for (SDNode *V : OtherAdd->uses()) {
- if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
- IID) {
- return SDValue(V, 0);
- }
+ if (U->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(U->getOperand(1)) &&
+ (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
+ (1ULL << Bits) ==
+ 0) {
+ SDNode *OtherAdd = U;
+ for (SDNode *V : OtherAdd->uses()) {
+ if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ V->getConstantOperandVal(0) == IID) {
+ return SDValue(V, 0);
}
}
}
+ }
}
}
@@ -16061,30 +16054,30 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::INTRINSIC_W_CHAIN:
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
- default:
- break;
- case Intrinsic::ppc_altivec_vsum4sbs:
- case Intrinsic::ppc_altivec_vsum4shs:
- case Intrinsic::ppc_altivec_vsum4ubs: {
- // These sum-across intrinsics only have a chain due to the side effect
- // that they may set the SAT bit. If we know the SAT bit will not be set
- // for some inputs, we can replace any uses of their chain with the input
- // chain.
- if (BuildVectorSDNode *BVN =
- dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
- APInt APSplatBits, APSplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- bool BVNIsConstantSplat = BVN->isConstantSplat(
- APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
- !Subtarget.isLittleEndian());
- // If the constant splat vector is 0, the SAT bit will not be set.
- if (BVNIsConstantSplat && APSplatBits == 0)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
+ switch (N->getConstantOperandVal(1)) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_vsum4sbs:
+ case Intrinsic::ppc_altivec_vsum4shs:
+ case Intrinsic::ppc_altivec_vsum4ubs: {
+ // These sum-across intrinsics only have a chain due to the side effect
+ // that they may set the SAT bit. If we know the SAT bit will not be set
+ // for some inputs, we can replace any uses of their chain with the
+ // input chain.
+ if (BuildVectorSDNode *BVN =
+ dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool BVNIsConstantSplat = BVN->isConstantSplat(
+ APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
+ !Subtarget.isLittleEndian());
+ // If the constant splat vector is 0, the SAT bit will not be set.
+ if (BVNIsConstantSplat && APSplatBits == 0)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
+ }
+ return SDValue();
}
- return SDValue();
- }
case Intrinsic::ppc_vsx_lxvw4x:
case Intrinsic::ppc_vsx_lxvd2x:
// For little endian, VSX loads require generating lxvd2x/xxswapd.
@@ -16098,7 +16091,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// For little endian, VSX stores require generating xxswapd/stxvd2x.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
if (Subtarget.needsSwapsForVSXMemOps()) {
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ switch (N->getConstantOperandVal(1)) {
default:
break;
case Intrinsic::ppc_vsx_stxvw4x:
@@ -16327,7 +16320,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
- switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ switch (LHS.getConstantOperandVal(1)) {
default: // Can't happen, don't crash on invalid number though.
case 0: // Branch on the value of the EQ bit of CR6.
CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
@@ -16406,7 +16399,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
- switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ switch (Op.getConstantOperandVal(0)) {
default: break;
case Intrinsic::ppc_altivec_vcmpbfp_p:
case Intrinsic::ppc_altivec_vcmpeqfp_p:
@@ -16433,7 +16426,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
case ISD::INTRINSIC_W_CHAIN: {
- switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ switch (Op.getConstantOperandVal(1)) {
default:
break;
case Intrinsic::ppc_load2r:
@@ -16868,7 +16861,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
return SDValue();
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
// Make sure the function does not optimize away the store of the RA to
// the stack.
@@ -16901,7 +16894,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -18086,8 +18079,7 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
else
FlagSet |= PPC::MOF_RPlusR; // Register.
- } else if (RHS.getOpcode() == PPCISD::Lo &&
- !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
+ } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
else
FlagSet |= PPC::MOF_RPlusR;
@@ -18131,7 +18123,7 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
unsigned ParentOp = Parent->getOpcode();
if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
(ParentOp == ISD::INTRINSIC_VOID))) {
- unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
+ unsigned ID = Parent->getConstantOperandVal(1);
if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
? Parent->getOperand(2)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index f3ea0f597eec..4759aa951664 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1832,57 +1832,18 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
if (getParser().parseIdentifier(Identifier))
return ParseStatus::Failure;
- // Check for CSR names conflicts.
- // Custom CSR names might conflict with CSR names in privileged spec.
- // E.g. - SiFive mnscratch(0x350) and privileged spec mnscratch(0x740).
- auto CheckCSRNameConflict = [&]() {
- if (!(RISCVSysReg::lookupSysRegByName(Identifier))) {
- Error(S, "system register use requires an option to be enabled");
- return true;
- }
- return false;
- };
-
- // First check for vendor specific CSRs.
- auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByName(Identifier);
- if (SiFiveReg) {
- if (SiFiveReg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) {
- Operands.push_back(
- RISCVOperand::createSysReg(Identifier, S, SiFiveReg->Encoding));
- return ParseStatus::Success;
- }
- if (CheckCSRNameConflict())
- return ParseStatus::Failure;
- }
-
auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
if (!SysReg)
+ SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier);
+ if (!SysReg)
if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier)))
Warning(S, "'" + Identifier + "' is a deprecated alias for '" +
SysReg->Name + "'");
- // Check for CSR encoding conflicts.
- // Custom CSR encoding might conflict with CSR encoding in privileged spec.
- // E.g. - SiFive mnscratch(0x350) and privileged spec miselect(0x350).
- auto CheckCSREncodingConflict = [&]() {
- auto Reg = RISCVSysReg::lookupSiFiveRegByEncoding(SysReg->Encoding);
- if (Reg && Reg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) {
- Warning(S, "'" + Identifier + "' CSR is not available on the current " +
- "subtarget. Instead '" + Reg->Name +
- "' CSR will be used.");
- Operands.push_back(
- RISCVOperand::createSysReg(Reg->Name, S, Reg->Encoding));
- return true;
- }
- return false;
- };
-
- // Accept a named SysReg if the required features are present.
+ // Accept a named Sys Reg if the required features are present.
if (SysReg) {
if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits()))
return Error(S, "system register use requires an option to be enabled");
- if (CheckCSREncodingConflict())
- return ParseStatus::Success;
Operands.push_back(
RISCVOperand::createSysReg(Identifier, S, SysReg->Encoding));
return ParseStatus::Success;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 53e2b6b4d94e..ed80da14c795 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -74,6 +74,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ MCRegister Reg = RISCV::X0 + RegNo;
+ if (Reg != RISCV::X1 && Reg != RISCV::X5)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -359,6 +370,10 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder);
+static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+
#include "RISCVGenDisassemblerTables.inc"
static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
@@ -373,6 +388,16 @@ static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
return MCDisassembler::Success;
}
+static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5);
+ DecodeStatus Result = DecodeGPRX1X5RegisterClass(Inst, Rs1, Address, Decoder);
+ (void)Result;
+ assert(Result == MCDisassembler::Success && "Invalid register");
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -462,10 +487,8 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
return MCDisassembler::Success;
}
-// spimm is based on rlist now.
static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
uint64_t Address, const void *Decoder) {
- // TODO: check if spimm matches rlist
Inst.addOperand(MCOperand::createImm(Imm));
return MCDisassembler::Success;
}
@@ -568,8 +591,6 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
TRY_TO_DECODE_FEATURE(
RISCV::FeatureVendorXSfvfnrclipxfqf, DecoderTableXSfvfnrclipxfqf32,
"SiFive FP32-to-int8 Ranged Clip Instructions opcode table");
- TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32,
- "Sifive CIE custom opcode table");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip,
DecoderTableXCVbitmanip32,
"CORE-V Bit Manipulation custom opcode table");
@@ -600,6 +621,8 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
TRY_TO_DECODE_AND_ADD_SP(!STI.hasFeature(RISCV::Feature64Bit),
DecoderTableRISCV32Only_16,
"RISCV32Only_16 table (16-bit Instruction)");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZicfiss, DecoderTableZicfiss16,
+ "RVZicfiss table (Shadow Stack)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmt, DecoderTableRVZcmt16,
"Zcmt table (16-bit Table Jump Instructions)");
TRY_TO_DECODE_FEATURE(
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 28ec999157c6..079906d1958c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -101,7 +101,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR});
- if (ST.hasStdExtZbb()) {
+ if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) {
RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}});
// Widen s32 rotate amount to s64 so SDAG patterns will match.
if (ST.is64Bit())
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 66a46a485f53..74d0db545e55 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -27,7 +27,6 @@ extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
namespace RISCVSysReg {
#define GET_SysRegsList_IMPL
-#define GET_SiFiveRegsList_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCVSysReg
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 30ed36525e29..c32210fc1419 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -401,6 +401,7 @@ int getLoadFPImm(APFloat FPImm);
namespace RISCVSysReg {
struct SysReg {
const char *Name;
+ const char *AltName;
const char *DeprecatedName;
unsigned Encoding;
// FIXME: add these additional fields when needed.
@@ -424,22 +425,9 @@ struct SysReg {
return true;
return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
}
-
- bool haveVendorRequiredFeatures(const FeatureBitset &ActiveFeatures) const {
- // Not in 32-bit mode.
- if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
- return false;
- // No required feature associated with the system register.
- if (FeaturesRequired.none())
- return false;
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
- }
};
-struct SiFiveReg : SysReg {};
-
#define GET_SysRegsList_DECL
-#define GET_SiFiveRegsList_DECL
#include "RISCVGenSearchableTables.inc"
} // end namespace RISCVSysReg
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 195dda0b8b14..bd899495812f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -121,11 +121,8 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
- auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm);
auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
- if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits()))
- markup(O, Markup::Register) << SiFiveReg->Name;
- else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
+ if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
markup(O, Markup::Register) << SysReg->Name;
else
markup(O, Markup::Register) << formatImm(Imm);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td
index 130a6ecc143d..3dd0b3723828 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -14,7 +14,7 @@
// RISCVISelLowering.cpp (CC_RISCV).
def CSR_ILP32_LP64
- : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
+ : CalleeSavedRegs<(add X1, X8, X9, (sequence "X%u", 18, 27))>;
def CSR_ILP32F_LP64F
: CalleeSavedRegs<(add CSR_ILP32_LP64,
@@ -29,7 +29,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// Interrupt handler needs to save/restore all registers that are used,
// both Caller and Callee saved registers.
-def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 31))>;
+def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 5, 31))>;
// Same as CSR_Interrupt, but including all 32-bit FP registers.
def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
index a66dd135ae5f..59b202606dad 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -687,6 +687,28 @@ def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">,
AssemblerPredicate<(all_of FeatureStdExtZicond),
"'Zicond' (Integer Conditional Operations)">;
+def FeatureStdExtZimop : SubtargetFeature<"experimental-zimop", "HasStdExtZimop", "true",
+ "'Zimop' (May-Be-Operations)">;
+def HasStdExtZimop : Predicate<"Subtarget->hasStdExtZimop()">,
+ AssemblerPredicate<(all_of FeatureStdExtZimop),
+ "'Zimop' (May-Be-Operations)">;
+
+def FeatureStdExtZcmop : SubtargetFeature<"experimental-zcmop", "HasStdExtZcmop", "true",
+ "'Zcmop' (Compressed May-Be-Operations)",
+ [FeatureStdExtZca]>;
+def HasStdExtZcmop : Predicate<"Subtarget->hasStdExtZcmop()">,
+ AssemblerPredicate<(all_of FeatureStdExtZcmop),
+ "'Zcmop' (Compressed May-Be-Operations)">;
+
+def FeatureStdExtZicfiss
+ : SubtargetFeature<"experimental-zicfiss", "HasStdExtZicfiss", "true",
+ "'Zicfiss' (Shadow stack)",
+ [FeatureStdExtZicsr, FeatureStdExtZimop]>;
+def HasStdExtZicfiss : Predicate<"Subtarget->hasStdExtZicfiss()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicfiss),
+ "'Zicfiss' (Shadow stack)">;
+def NoHasStdExtZicfiss : Predicate<"!Subtarget->hasStdExtZicfiss()">;
+
def FeatureStdExtSmaia
: SubtargetFeature<"smaia", "HasStdExtSmaia", "true",
"'Smaia' (Smaia encompasses all added CSRs and all "
@@ -813,13 +835,6 @@ def HasVendorXSfvcp : Predicate<"Subtarget->hasVendorXSfvcp()">,
AssemblerPredicate<(all_of FeatureVendorXSfvcp),
"'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions)">;
-def FeatureVendorXSfcie
- : SubtargetFeature<"xsfcie", "HasVendorXSfcie", "true",
- "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
-def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">,
- AssemblerPredicate<(all_of FeatureVendorXSfcie),
- "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
-
def FeatureVendorXSfvqmaccdod
: SubtargetFeature<"xsfvqmaccdod", "HasVendorXSfvqmaccdod", "true",
"'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))",
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 098a320c9153..bfa3bf3cc74e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1360,7 +1360,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
break;
- uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t C2 = N0.getConstantOperandVal(1);
// Constant should be a mask.
if (!isMask_64(C2))
@@ -1604,7 +1604,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
}
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
// By default we do not custom select any intrinsic.
default:
@@ -1825,7 +1825,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
}
case ISD::INTRINSIC_VOID: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
switch (IntNo) {
case Intrinsic::riscv_vsseg2:
case Intrinsic::riscv_vsseg3:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c2508a158837..03a59f8a8b57 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
- ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
- ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
+ ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
+ ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
@@ -7235,7 +7235,7 @@ SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
while (Depth--) {
int Offset = -(XLenInBytes * 2);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
@@ -7260,7 +7260,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
if (Depth) {
int Off = -XLenInBytes;
SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
@@ -11731,7 +11731,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntNo = N->getConstantOperandVal(0);
switch (IntNo) {
default:
llvm_unreachable(
@@ -12850,9 +12850,9 @@ struct CombineResult;
/// Helper class for folding sign/zero extensions.
/// In particular, this class is used for the following combines:
-/// add_vl -> vwadd(u) | vwadd(u)_w
-/// sub_vl -> vwsub(u) | vwsub(u)_w
-/// mul_vl -> vwmul(u) | vwmul_su
+/// add | add_vl -> vwadd(u) | vwadd(u)_w
+/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
+/// mul | mul_vl -> vwmul(u) | vwmul_su
///
/// An object of this class represents an operand of the operation we want to
/// combine.
@@ -12897,6 +12897,8 @@ struct NodeExtensionHelper {
/// E.g., for zext(a), this would return a.
SDValue getSource() const {
switch (OrigOperand.getOpcode()) {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
return OrigOperand.getOperand(0);
@@ -12913,7 +12915,8 @@ struct NodeExtensionHelper {
/// Get or create a value that can feed \p Root with the given extension \p
/// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
/// \see ::getSource().
- SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
+ SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
std::optional<bool> SExt) const {
if (!SExt.has_value())
return OrigOperand;
@@ -12928,8 +12931,10 @@ struct NodeExtensionHelper {
// If we need an extension, we should be changing the type.
SDLoc DL(Root);
- auto [Mask, VL] = getMaskAndVL(Root);
+ auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
switch (OrigOperand.getOpcode()) {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
@@ -12969,12 +12974,15 @@ struct NodeExtensionHelper {
/// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
switch (Opcode) {
+ case ISD::ADD:
case RISCVISD::ADD_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
+ case ISD::MUL:
case RISCVISD::MUL_VL:
return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
+ case ISD::SUB:
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
@@ -12987,7 +12995,8 @@ struct NodeExtensionHelper {
/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
/// newOpcode(a, b).
static unsigned getSUOpcode(unsigned Opcode) {
- assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
+ assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
+ "SU is only supported for MUL");
return RISCVISD::VWMULSU_VL;
}
@@ -12995,8 +13004,10 @@ struct NodeExtensionHelper {
/// newOpcode(a, b).
static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
switch (Opcode) {
+ case ISD::ADD:
case RISCVISD::ADD_VL:
return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
+ case ISD::SUB:
case RISCVISD::SUB_VL:
return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
default:
@@ -13006,19 +13017,33 @@ struct NodeExtensionHelper {
using CombineToTry = std::function<std::optional<CombineResult>(
SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
- const NodeExtensionHelper & /*RHS*/)>;
+ const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
+ const RISCVSubtarget &)>;
/// Check if this node needs to be fully folded or extended for all users.
bool needToPromoteOtherUsers() const { return EnforceOneUse; }
/// Helper method to set the various fields of this struct based on the
/// type of \p Root.
- void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
+ void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SupportsZExt = false;
SupportsSExt = false;
EnforceOneUse = true;
CheckMask = true;
- switch (OrigOperand.getOpcode()) {
+ unsigned Opc = OrigOperand.getOpcode();
+ switch (Opc) {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ if (OrigOperand.getValueType().isVector()) {
+ SupportsZExt = Opc == ISD::ZERO_EXTEND;
+ SupportsSExt = Opc == ISD::SIGN_EXTEND;
+ SDLoc DL(Root);
+ MVT VT = Root->getSimpleValueType(0);
+ std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
+ }
+ break;
+ }
case RISCVISD::VZEXT_VL:
SupportsZExt = true;
Mask = OrigOperand.getOperand(1);
@@ -13074,8 +13099,16 @@ struct NodeExtensionHelper {
}
/// Check if \p Root supports any extension folding combines.
- static bool isSupportedRoot(const SDNode *Root) {
+ static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) {
switch (Root->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(Root->getValueType(0)))
+ return false;
+ return Root->getValueType(0).isScalableVector();
+ }
case RISCVISD::ADD_VL:
case RISCVISD::MUL_VL:
case RISCVISD::VWADD_W_VL:
@@ -13090,9 +13123,10 @@ struct NodeExtensionHelper {
}
/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
- NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
- assert(isSupportedRoot(Root) && "Trying to build an helper with an "
- "unsupported root");
+ NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an "
+ "unsupported root");
assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
OrigOperand = Root->getOperand(OperandIdx);
@@ -13108,7 +13142,7 @@ struct NodeExtensionHelper {
SupportsZExt =
Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
SupportsSExt = !SupportsZExt;
- std::tie(Mask, VL) = getMaskAndVL(Root);
+ std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget);
CheckMask = true;
// There's no existing extension here, so we don't have to worry about
// making sure it gets removed.
@@ -13117,7 +13151,7 @@ struct NodeExtensionHelper {
}
[[fallthrough]];
default:
- fillUpExtensionSupport(Root, DAG);
+ fillUpExtensionSupport(Root, DAG, Subtarget);
break;
}
}
@@ -13133,14 +13167,27 @@ struct NodeExtensionHelper {
}
/// Helper function to get the Mask and VL from \p Root.
- static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
- assert(isSupportedRoot(Root) && "Unexpected root");
- return std::make_pair(Root->getOperand(3), Root->getOperand(4));
+ static std::pair<SDValue, SDValue>
+ getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(isSupportedRoot(Root, DAG) && "Unexpected root");
+ switch (Root->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: {
+ SDLoc DL(Root);
+ MVT VT = Root->getSimpleValueType(0);
+ return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
+ }
+ default:
+ return std::make_pair(Root->getOperand(3), Root->getOperand(4));
+ }
}
/// Check if the Mask and VL of this operand are compatible with \p Root.
- bool areVLAndMaskCompatible(const SDNode *Root) const {
- auto [Mask, VL] = getMaskAndVL(Root);
+ bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) const {
+ auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
return isMaskCompatible(Mask) && isVLCompatible(VL);
}
@@ -13148,11 +13195,14 @@ struct NodeExtensionHelper {
/// foldings that are supported by this class.
static bool isCommutative(const SDNode *N) {
switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::MUL:
case RISCVISD::ADD_VL:
case RISCVISD::MUL_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return true;
+ case ISD::SUB:
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
@@ -13197,14 +13247,25 @@ struct CombineResult {
/// Return a value that uses TargetOpcode and that can be used to replace
/// Root.
/// The actual replacement is *not* done in that method.
- SDValue materialize(SelectionDAG &DAG) const {
+ SDValue materialize(SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) const {
SDValue Mask, VL, Merge;
- std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
- Merge = Root->getOperand(2);
+ std::tie(Mask, VL) =
+ NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
+ switch (Root->getOpcode()) {
+ default:
+ Merge = Root->getOperand(2);
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ Merge = DAG.getUNDEF(Root->getValueType(0));
+ break;
+ }
return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
- LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
- RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
- Mask, VL);
+ LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS),
+ RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS),
+ Merge, Mask, VL);
}
};
@@ -13221,15 +13282,16 @@ struct CombineResult {
static std::optional<CombineResult>
canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
const NodeExtensionHelper &RHS, bool AllowSExt,
- bool AllowZExt) {
+ bool AllowZExt, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
- if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
+ if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
+ !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
return std::nullopt;
if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
Root->getOpcode(), /*IsSExt=*/false),
- Root, LHS, /*SExtLHS=*/false, RHS,
- /*SExtRHS=*/false);
+ Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false);
if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
Root->getOpcode(), /*IsSExt=*/true),
@@ -13246,9 +13308,10 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
- /*AllowZExt=*/true);
+ /*AllowZExt=*/true, DAG, Subtarget);
}
/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
@@ -13257,8 +13320,9 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
- if (!RHS.areVLAndMaskCompatible(Root))
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
return std::nullopt;
// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
@@ -13282,9 +13346,10 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
- /*AllowZExt=*/false);
+ /*AllowZExt=*/false, DAG, Subtarget);
}
/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
@@ -13293,9 +13358,10 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
- /*AllowZExt=*/true);
+ /*AllowZExt=*/true, DAG, Subtarget);
}
/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
@@ -13304,10 +13370,13 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+
if (!LHS.SupportsSExt || !RHS.SupportsZExt)
return std::nullopt;
- if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
+ if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
+ !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
return std::nullopt;
return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
@@ -13317,6 +13386,8 @@ SmallVector<NodeExtensionHelper::CombineToTry>
NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
SmallVector<CombineToTry> Strategies;
switch (Root->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
case RISCVISD::ADD_VL:
case RISCVISD::SUB_VL:
// add|sub -> vwadd(u)|vwsub(u)
@@ -13324,6 +13395,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
// add|sub -> vwadd(u)_w|vwsub(u)_w
Strategies.push_back(canFoldToVW_W);
break;
+ case ISD::MUL:
case RISCVISD::MUL_VL:
// mul -> vwmul(u)
Strategies.push_back(canFoldToVWWithSameExtension);
@@ -13354,12 +13426,14 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
/// mul_vl -> vwmul(u) | vwmul_su
/// vwadd_w(u) -> vwadd(u)
/// vwub_w(u) -> vwadd(u)
-static SDValue
-combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
- assert(NodeExtensionHelper::isSupportedRoot(N) &&
- "Shouldn't have called this method");
+ if (!NodeExtensionHelper::isSupportedRoot(N, DAG))
+ return SDValue();
+
SmallVector<SDNode *> Worklist;
SmallSet<SDNode *, 8> Inserted;
Worklist.push_back(N);
@@ -13368,11 +13442,11 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
while (!Worklist.empty()) {
SDNode *Root = Worklist.pop_back_val();
- if (!NodeExtensionHelper::isSupportedRoot(Root))
+ if (!NodeExtensionHelper::isSupportedRoot(Root, DAG))
return SDValue();
- NodeExtensionHelper LHS(N, 0, DAG);
- NodeExtensionHelper RHS(N, 1, DAG);
+ NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
+ NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
auto AppendUsersIfNeeded = [&Worklist,
&Inserted](const NodeExtensionHelper &Op) {
if (Op.needToPromoteOtherUsers()) {
@@ -13399,7 +13473,8 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (NodeExtensionHelper::CombineToTry FoldingStrategy :
FoldingStrategies) {
- std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
+ std::optional<CombineResult> Res =
+ FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
if (Res) {
Matched = true;
CombinesToApply.push_back(*Res);
@@ -13428,7 +13503,7 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
ValuesToReplace.reserve(CombinesToApply.size());
for (CombineResult Res : CombinesToApply) {
- SDValue NewValue = Res.materialize(DAG);
+ SDValue NewValue = Res.materialize(DAG, Subtarget);
if (!InputRootReplacement) {
assert(Res.Root == N &&
"First element is expected to be the current node");
@@ -14078,7 +14153,7 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
for (SDNode *U : N0->uses()) {
if (U->getOpcode() != ISD::SRA ||
!isa<ConstantSDNode>(U->getOperand(1)) ||
- cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32)
+ U->getConstantOperandVal(1) > 32)
return SDValue();
}
@@ -14700,13 +14775,20 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- assert(N->getOpcode() == RISCVISD::ADD_VL);
+
+ assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
+
+ if (N->getValueType(0).isFixedLengthVector())
+ return SDValue();
+
SDValue Addend = N->getOperand(0);
SDValue MulOp = N->getOperand(1);
- SDValue AddMergeOp = N->getOperand(2);
- if (!AddMergeOp.isUndef())
- return SDValue();
+ if (N->getOpcode() == RISCVISD::ADD_VL) {
+ SDValue AddMergeOp = N->getOperand(2);
+ if (!AddMergeOp.isUndef())
+ return SDValue();
+ }
auto IsVWMulOpc = [](unsigned Opc) {
switch (Opc) {
@@ -14730,8 +14812,16 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
if (!MulMergeOp.isUndef())
return SDValue();
- SDValue AddMask = N->getOperand(3);
- SDValue AddVL = N->getOperand(4);
+ auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (N->getOpcode() == ISD::ADD) {
+ SDLoc DL(N);
+ return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
+ Subtarget);
+ }
+ return std::make_pair(N->getOperand(3), N->getOperand(4));
+ }(N, DAG, Subtarget);
+
SDValue MulMask = MulOp.getOperand(3);
SDValue MulVL = MulOp.getOperand(4);
@@ -14997,10 +15087,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(ISD::AND, DL, VT, NewFMV,
DAG.getConstant(~SignBit, DL, VT));
}
- case ISD::ADD:
+ case ISD::ADD: {
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
+ if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
+ return V;
return performADDCombine(N, DAG, Subtarget);
- case ISD::SUB:
+ }
+ case ISD::SUB: {
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
return performSUBCombine(N, DAG, Subtarget);
+ }
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);
case ISD::OR:
@@ -15008,6 +15106,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR:
return performXORCombine(N, DAG, Subtarget);
case ISD::MUL:
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
return performMULCombine(N, DAG);
case ISD::FADD:
case ISD::UMAX:
@@ -15484,7 +15584,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::ADD_VL:
- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
return combineToVWMACC(N, DAG, Subtarget);
case RISCVISD::SUB_VL:
@@ -15493,7 +15593,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
case RISCVISD::MUL_VL:
- return combineBinOp_VLToVWBinOp_VL(N, DCI);
+ return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
case RISCVISD::VFMADD_VL:
case RISCVISD::VFNMADD_VL:
case RISCVISD::VFMSUB_VL:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
index de2227f82192..e487cc8b2e20 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
@@ -198,13 +198,23 @@ char RISCVInsertWriteVXRM::ID = 0;
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
false, false)
+static bool ignoresVXRM(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VNCLIP_WI:
+ case RISCV::VNCLIPU_WI:
+ return MI.getOperand(3).getImm() == 0;
+ }
+}
+
bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
bool NeedVXRMWrite = false;
for (const MachineInstr &MI : MBB) {
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
- if (VXRMIdx >= 0) {
+ if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
if (!BBInfo.VXRMUse.isValid())
@@ -356,7 +366,7 @@ void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
for (MachineInstr &MI : MBB) {
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
- if (VXRMIdx >= 0) {
+ if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
if (PendingInsert || !Info.isStatic() ||
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1dcff7eb563e..cd98438eed88 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2282,9 +2282,14 @@ bool RISCVInstrInfo::shouldClusterMemOps(
return false;
}
- // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
- // indicate they likely share a cache line.
- return ClusterSize <= 4;
+ unsigned CacheLineSize =
+ BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
+ // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
+ CacheLineSize = CacheLineSize ? CacheLineSize : 64;
+ // Cluster if the memory operations are on the same or a neighbouring cache
+ // line, but limit the maximum ClusterSize to avoid creating too much
+ // additional register pressure.
+ return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
}
// Set BaseReg (the base register operand), Offset (the byte offset being
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index edc08187d8f7..35e8edf5d2fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2111,13 +2111,16 @@ include "RISCVInstrInfoZk.td"
include "RISCVInstrInfoV.td"
include "RISCVInstrInfoZvk.td"
-// Integer
-include "RISCVInstrInfoZicbo.td"
-include "RISCVInstrInfoZicond.td"
-
// Compressed
include "RISCVInstrInfoC.td"
include "RISCVInstrInfoZc.td"
+include "RISCVInstrInfoZcmop.td"
+
+// Integer
+include "RISCVInstrInfoZimop.td"
+include "RISCVInstrInfoZicbo.td"
+include "RISCVInstrInfoZicond.td"
+include "RISCVInstrInfoZicfiss.td"
//===----------------------------------------------------------------------===//
// Vendor extensions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 488ffa73f4e4..30deeaa06448 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -257,13 +257,13 @@ class SegRegClass<LMULInfo m, int nf> {
// Vector register and vector group type information.
//===----------------------------------------------------------------------===//
-class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
+class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, LMULInfo M,
ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> {
ValueType Vector = Vec;
ValueType Mask = Mas;
int SEW = Sew;
int Log2SEW = !logtwo(Sew);
- VReg RegClass = Reg;
+ VReg RegClass = M.vrclass;
LMULInfo LMul = M;
ValueType Scalar = Scal;
RegisterClass ScalarRegClass = ScalarReg;
@@ -279,9 +279,9 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
}
class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew,
- VReg Reg, LMULInfo M, ValueType Scal = XLenVT,
+ LMULInfo M, ValueType Scal = XLenVT,
RegisterClass ScalarReg = GPR>
- : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg> {
+ : VTypeInfo<Vec, Mas, Sew, M, Scal, ScalarReg> {
ValueType VectorM1 = VecM1;
}
@@ -289,70 +289,70 @@ defset list<VTypeInfo> AllVectors = {
defset list<VTypeInfo> AllIntegerVectors = {
defset list<VTypeInfo> NoGroupIntegerVectors = {
defset list<VTypeInfo> FractionalGroupIntegerVectors = {
- def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, VR, V_MF8>;
- def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, VR, V_MF4>;
- def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, VR, V_MF2>;
- def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, VR, V_MF4>;
- def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, VR, V_MF2>;
- def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, VR, V_MF2>;
+ def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, V_MF8>;
+ def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, V_MF4>;
+ def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, V_MF2>;
+ def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, V_MF4>;
+ def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, V_MF2>;
+ def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, V_MF2>;
}
- def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, VR, V_M1>;
- def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, VR, V_M1>;
- def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, VR, V_M1>;
- def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, VR, V_M1>;
+ def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, V_M1>;
+ def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, V_M1>;
+ def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, V_M1>;
+ def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, V_M1>;
}
defset list<GroupVTypeInfo> GroupIntegerVectors = {
- def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, VRM2, V_M2>;
- def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, VRM4, V_M4>;
- def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, VRM8, V_M8>;
+ def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, V_M2>;
+ def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, V_M4>;
+ def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, V_M8>;
- def VI16M2: GroupVTypeInfo<vint16m2_t,vint16m1_t,vbool8_t, 16,VRM2, V_M2>;
- def VI16M4: GroupVTypeInfo<vint16m4_t,vint16m1_t,vbool4_t, 16,VRM4, V_M4>;
- def VI16M8: GroupVTypeInfo<vint16m8_t,vint16m1_t,vbool2_t, 16,VRM8, V_M8>;
+ def VI16M2: GroupVTypeInfo<vint16m2_t, vint16m1_t, vbool8_t, 16, V_M2>;
+ def VI16M4: GroupVTypeInfo<vint16m4_t, vint16m1_t, vbool4_t, 16, V_M4>;
+ def VI16M8: GroupVTypeInfo<vint16m8_t, vint16m1_t, vbool2_t, 16, V_M8>;
- def VI32M2: GroupVTypeInfo<vint32m2_t,vint32m1_t,vbool16_t,32,VRM2, V_M2>;
- def VI32M4: GroupVTypeInfo<vint32m4_t,vint32m1_t,vbool8_t, 32,VRM4, V_M4>;
- def VI32M8: GroupVTypeInfo<vint32m8_t,vint32m1_t,vbool4_t, 32,VRM8, V_M8>;
+ def VI32M2: GroupVTypeInfo<vint32m2_t, vint32m1_t, vbool16_t, 32, V_M2>;
+ def VI32M4: GroupVTypeInfo<vint32m4_t, vint32m1_t, vbool8_t, 32, V_M4>;
+ def VI32M8: GroupVTypeInfo<vint32m8_t, vint32m1_t, vbool4_t, 32, V_M8>;
- def VI64M2: GroupVTypeInfo<vint64m2_t,vint64m1_t,vbool32_t,64,VRM2, V_M2>;
- def VI64M4: GroupVTypeInfo<vint64m4_t,vint64m1_t,vbool16_t,64,VRM4, V_M4>;
- def VI64M8: GroupVTypeInfo<vint64m8_t,vint64m1_t,vbool8_t, 64,VRM8, V_M8>;
+ def VI64M2: GroupVTypeInfo<vint64m2_t, vint64m1_t, vbool32_t, 64, V_M2>;
+ def VI64M4: GroupVTypeInfo<vint64m4_t, vint64m1_t, vbool16_t, 64, V_M4>;
+ def VI64M8: GroupVTypeInfo<vint64m8_t, vint64m1_t, vbool8_t, 64, V_M8>;
}
}
defset list<VTypeInfo> AllFloatVectors = {
defset list<VTypeInfo> NoGroupFloatVectors = {
defset list<VTypeInfo> FractionalGroupFloatVectors = {
- def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, VR, V_MF4, f16, FPR16>;
- def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, VR, V_MF2, f16, FPR16>;
- def VF32MF2: VTypeInfo<vfloat32mf2_t,vbool64_t, 32, VR, V_MF2, f32, FPR32>;
+ def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, V_MF4, f16, FPR16>;
+ def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, V_MF2, f16, FPR16>;
+ def VF32MF2: VTypeInfo<vfloat32mf2_t, vbool64_t, 32, V_MF2, f32, FPR32>;
}
- def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, VR, V_M1, f16, FPR16>;
- def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, VR, V_M1, f32, FPR32>;
- def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, VR, V_M1, f64, FPR64>;
+ def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, V_M1, f16, FPR16>;
+ def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, V_M1, f32, FPR32>;
+ def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, V_M1, f64, FPR64>;
}
defset list<GroupVTypeInfo> GroupFloatVectors = {
def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16,
- VRM2, V_M2, f16, FPR16>;
+ V_M2, f16, FPR16>;
def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16,
- VRM4, V_M4, f16, FPR16>;
+ V_M4, f16, FPR16>;
def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16,
- VRM8, V_M8, f16, FPR16>;
+ V_M8, f16, FPR16>;
def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32,
- VRM2, V_M2, f32, FPR32>;
+ V_M2, f32, FPR32>;
def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t, 32,
- VRM4, V_M4, f32, FPR32>;
+ V_M4, f32, FPR32>;
def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t, 32,
- VRM8, V_M8, f32, FPR32>;
+ V_M8, f32, FPR32>;
def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64,
- VRM2, V_M2, f64, FPR64>;
+ V_M2, f64, FPR64>;
def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64,
- VRM4, V_M4, f64, FPR64>;
+ V_M4, f64, FPR64>;
def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t, 64,
- VRM8, V_M8, f64, FPR64>;
+ V_M8, f64, FPR64>;
}
}
}
@@ -360,19 +360,19 @@ defset list<VTypeInfo> AllVectors = {
defset list<VTypeInfo> AllBFloatVectors = {
defset list<VTypeInfo> NoGroupBFloatVectors = {
defset list<VTypeInfo> FractionalGroupBFloatVectors = {
- def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>;
- def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>;
+ def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, V_MF4, bf16, FPR16>;
+ def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, V_MF2, bf16, FPR16>;
}
- def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>;
+ def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, V_M1, bf16, FPR16>;
}
defset list<GroupVTypeInfo> GroupBFloatVectors = {
def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16,
- VRM2, V_M2, bf16, FPR16>;
+ V_M2, bf16, FPR16>;
def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16,
- VRM4, V_M4, bf16, FPR16>;
+ V_M4, bf16, FPR16>;
def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16,
- VRM8, V_M8, bf16, FPR16>;
+ V_M8, bf16, FPR16>;
}
}
@@ -1069,7 +1069,8 @@ class VPseudoUnaryMask<VReg RetClass,
class VPseudoUnaryMaskRoundingMode<VReg RetClass,
VReg OpClass,
- string Constraint = ""> :
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
VMaskOp:$vm, ixlenimm:$rm,
@@ -1079,6 +1080,7 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass,
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1106,7 +1108,8 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass,
class VPseudoUnaryNoMask_FRM<VReg RetClass,
VReg OpClass,
- string Constraint = ""> :
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm,
AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
@@ -1115,6 +1118,7 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass,
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1123,7 +1127,8 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass,
class VPseudoUnaryMask_FRM<VReg RetClass,
VReg OpClass,
- string Constraint = ""> :
+ string Constraint = "",
+ int TargetConstraintType = 1> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
VMaskOp:$vm, ixlenimm:$frm,
@@ -1133,6 +1138,7 @@ class VPseudoUnaryMask_FRM<VReg RetClass,
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 1;
@@ -1528,7 +1534,8 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
bit CarryIn,
- string Constraint> :
+ string Constraint,
+ int TargetConstraintType = 1> :
Pseudo<(outs RetClass:$rd),
!if(CarryIn,
(ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1,
@@ -1540,6 +1547,7 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let TargetOverlapConstraintType = TargetConstraintType;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasVecPolicyOp = 0;
@@ -2447,10 +2455,11 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}
-multiclass VPseudoTiedBinaryV_VM<LMULInfo m> {
+multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> {
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, m.vrclass, m, 1, "">;
+ m.vrclass, m.vrclass, m, 1, "",
+ TargetConstraintType>;
}
multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
@@ -2462,10 +2471,11 @@ multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>;
}
-multiclass VPseudoTiedBinaryV_XM<LMULInfo m> {
+multiclass VPseudoTiedBinaryV_XM<LMULInfo m, int TargetConstraintType = 1> {
def "_VXM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, GPR, m, 1, "">;
+ m.vrclass, GPR, m, 1, "",
+ TargetConstraintType>;
}
multiclass VPseudoVMRG_FM {
@@ -2596,45 +2606,48 @@ multiclass VPseudoVRCP_V_RM {
}
}
-multiclass PseudoVEXT_VF2<int TargetConstraintType = 1> {
+multiclass PseudoVEXT_VF2 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF2 in {
defvar mx = m.MX;
+ defvar CurrTypeConstraints = !if(!or(!eq(mx, "MF4"), !eq(mx, "MF2"), !eq(mx, "M1")), 1, 3);
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>,
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, CurrTypeConstraints>,
SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>,
+ VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, CurrTypeConstraints>,
RISCVMaskedPseudo<MaskIdx=2>,
SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF4<int TargetConstraintType = 1> {
+multiclass PseudoVEXT_VF4 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF4 in {
defvar mx = m.MX;
+ defvar CurrTypeConstraints = !if(!or(!eq(mx, "MF2"), !eq(mx, "M1"), !eq(mx, "M2")), 1, 3);
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>,
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, CurrTypeConstraints>,
SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>,
+ VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, CurrTypeConstraints>,
RISCVMaskedPseudo<MaskIdx=2>,
SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
}
}
-multiclass PseudoVEXT_VF8<int TargetConstraintType = 1> {
+multiclass PseudoVEXT_VF8 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxListVF8 in {
defvar mx = m.MX;
+ defvar CurrTypeConstraints = !if(!or(!eq(mx, "M1"), !eq(mx, "M2"), !eq(mx, "M4")), 1, 3);
let VLMul = m.value in {
- def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>,
+ def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, CurrTypeConstraints>,
SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>,
+ VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, CurrTypeConstraints>,
RISCVMaskedPseudo<MaskIdx=2>,
SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>;
}
@@ -3619,7 +3632,7 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass,
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
- Constraint>,
+ Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3628,12 +3641,13 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass,
multiclass VPseudoConversionRM<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
- string Constraint = ""> {
+ string Constraint = "",
+ int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class,
- Constraint>;
+ Constraint, TargetConstraintType>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class,
- Constraint>,
+ Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3761,7 +3775,7 @@ multiclass VPseudoVNCVTI_W_RM {
multiclass VPseudoVNCVTI_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
+ defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX,
forceMergeOpRead=true>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 33bdc3366aa3..5b50a4a78c01 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2338,6 +2338,64 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+class VPatTruncSatClipMaxMinBase<string inst,
+ VTypeInfo vti,
+ VTypeInfo wti,
+ SDPatternOperator op1,
+ int op1_value,
+ SDPatternOperator op2,
+ int op2_value> :
+ Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op1
+ (wti.Vector (op2
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
+ (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+class VPatTruncSatClipUMin<VTypeInfo vti,
+ VTypeInfo wti,
+ int uminval> :
+ Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (riscv_umin_vl
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
+ SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
+ def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
+ def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
+}
+
+multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
+ defvar sew = vti.SEW;
+ defvar uminval = !sub(!shl(1, sew), 1);
+ defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+ defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
+ sminval, riscv_smax_vl, smaxval>;
+ def : VPatTruncSatClipUMin<vti, wti, uminval>;
+ }
+
+}
+
+foreach vtiToWti = AllWidenableIntVectors in
+ defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>;
+
// 13. Vector Floating-Point Instructions
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 0b1d5b664df9..31f832dfd84c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -349,20 +349,26 @@ multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type,
: VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>;
}
-multiclass VPseudoSiFiveVQMACC<string Constraint = ""> {
+multiclass VPseudoSiFiveVQMACCDOD<string Constraint = ""> {
foreach m = MxListVF8 in
let VLMul = m.value in
defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>;
}
+multiclass VPseudoSiFiveVQMACCQOQ<string Constraint = ""> {
+ foreach m = [V_MF2, V_M1, V_M2, V_M4] in
+ let VLMul = m.value in
+ defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
+}
+
multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
- foreach m = MxListFW in
+ foreach m = MxListVF2 in
let VLMul = m.value in
defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
}
multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> {
- foreach i = [0, 1, 2, 3, 4] in
+ foreach i = 0-4 in
let hasSideEffects = 0 in
defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass,
MxListVF4[i].vrclass,
@@ -400,17 +406,17 @@ let Predicates = [HasVendorXSfvcp] in {
}
let Predicates = [HasVendorXSfvqmaccdod] in {
- defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACC;
- defm VQMACC_2x8x2 : VPseudoSiFiveVQMACC;
- defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACC;
- defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACC;
+ defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACCDOD;
+ defm VQMACC_2x8x2 : VPseudoSiFiveVQMACCDOD;
+ defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACCDOD;
+ defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACCDOD;
}
let Predicates = [HasVendorXSfvqmaccqoq] in {
- defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACC;
- defm VQMACC_4x8x4 : VPseudoSiFiveVQMACC;
- defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACC;
- defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC;
+ defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACCQOQ;
+ defm VQMACC_4x8x4 : VPseudoSiFiveVQMACCQOQ;
+ defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACCQOQ;
+ defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACCQOQ;
}
let Predicates = [HasVendorXSfvfwmaccqqq] in {
@@ -566,16 +572,25 @@ multiclass VPatVMACC<string intrinsic, string instruction, string kind,
}
}
-defset list<VTypeInfoToWide> VQMACCInfoPairs = {
+defset list<VTypeInfoToWide> VQMACCDODInfoPairs = {
def : VTypeInfoToWide<VI8M1, VI32M1>;
def : VTypeInfoToWide<VI8M2, VI32M2>;
def : VTypeInfoToWide<VI8M4, VI32M4>;
def : VTypeInfoToWide<VI8M8, VI32M8>;
}
-multiclass VPatVQMACC<string intrinsic, string instruction, string kind>
- : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>;
+defset list<VTypeInfoToWide> VQMACCQOQInfoPairs = {
+ def : VTypeInfoToWide<VI8MF2, VI32M1>;
+ def : VTypeInfoToWide<VI8M1, VI32M2>;
+ def : VTypeInfoToWide<VI8M2, VI32M4>;
+ def : VTypeInfoToWide<VI8M4, VI32M8>;
+}
+
+multiclass VPatVQMACCDOD<string intrinsic, string instruction, string kind>
+ : VPatVMACC<intrinsic, instruction, kind, VQMACCDODInfoPairs, vint8m1_t>;
+multiclass VPatVQMACCQOQ<string intrinsic, string instruction, string kind>
+ : VPatVMACC<intrinsic, instruction, kind, VQMACCQOQInfoPairs, vint8m1_t>;
multiclass VPatVFWMACC<string intrinsic, string instruction, string kind>
: VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors,
@@ -637,17 +652,17 @@ let Predicates = [HasVendorXSfvcp] in {
}
let Predicates = [HasVendorXSfvqmaccdod] in {
- defm : VPatVQMACC<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">;
- defm : VPatVQMACC<"vqmacc_2x8x2", "VQMACC", "2x8x2">;
- defm : VPatVQMACC<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">;
- defm : VPatVQMACC<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">;
+ defm : VPatVQMACCDOD<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">;
+ defm : VPatVQMACCDOD<"vqmacc_2x8x2", "VQMACC", "2x8x2">;
+ defm : VPatVQMACCDOD<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">;
+ defm : VPatVQMACCDOD<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">;
}
let Predicates = [HasVendorXSfvqmaccqoq] in {
- defm : VPatVQMACC<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">;
- defm : VPatVQMACC<"vqmacc_4x8x4", "VQMACC", "4x8x4">;
- defm : VPatVQMACC<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">;
- defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">;
+ defm : VPatVQMACCQOQ<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">;
+ defm : VPatVQMACCQOQ<"vqmacc_4x8x4", "VQMACC", "4x8x4">;
+ defm : VPatVQMACCQOQ<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">;
+ defm : VPatVQMACCQOQ<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">;
}
let Predicates = [HasVendorXSfvfwmaccqqq] in {
@@ -658,27 +673,3 @@ let Predicates = [HasVendorXSfvfnrclipxfqf] in {
defm : VPatVFNRCLIP<"vfnrclip_xu_f_qf", "VFNRCLIP_XU_F_QF">;
defm : VPatVFNRCLIP<"vfnrclip_x_f_qf", "VFNRCLIP_X_F_QF">;
}
-
-let Predicates = [HasVendorXSfcie] in {
-let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in {
-def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">,
- Sched<[]> {
- let rd = 0;
- let imm12 = {0b1111,0b1100,0b0000};
-}
-
-def SF_CDISCARD_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cdiscard.d.l1","$rs1">,
- Sched<[]> {
- let rd = 0;
- let imm12 = {0b1111,0b1100,0b0010};
-}
-
-def SF_CEASE : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "cease","">, Sched<[]> {
- let rs1 = 0;
- let rd = 0;
- let imm12 = {0b0011,0b0000,0b0101};
-}
-}
-def : InstAlias<"cflush.d.l1", (SF_CFLUSH_D_L1 X0)>;
-def : InstAlias<"cdiscard.d.l1", (SF_CDISCARD_D_L1 X0)>;
-} // Predicates = [HasVendorXScie]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index a78f36244468..3506204d6c25 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -56,9 +56,8 @@ def rlist : Operand<OtherVT> {
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
return false;
- if (!isUInt<4>(Imm)) return false;
// 0~3 Reserved for EABI
- return (Imm >= 4) && (Imm <= 15);
+ return isUInt<4>(Imm) && Imm >= 4;
}];
}
@@ -70,7 +69,7 @@ def spimm : Operand<OtherVT> {
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
return false;
- return isShiftedUInt<5, 4>(Imm);
+ return isShiftedUInt<2, 4>(Imm);
}];
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
new file mode 100644
index 000000000000..6fbfde5ef488
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
@@ -0,0 +1,34 @@
+//===-- RISCVInstrInfoZcmop.td -----------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard Compressed
+// May-Be-Operations Extension (Zcmop).
+// This version is still experimental as the 'Zcmop' extension hasn't been
+// ratified yet. It is based on v0.2 of the specification.
+//
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class CMOPInst<bits<3> imm3, string opcodestr>
+ : RVInst16CI<0b011, 0b01, (outs), (ins), opcodestr, ""> {
+ let Inst{6-2} = 0;
+ let Inst{7} = 1;
+ let Inst{10-8} = imm3;
+ let Inst{12-11} = 0;
+}
+
+// CMOP1, CMOP5 is used by Zicfiss.
+let Predicates = [HasStdExtZcmop, NoHasStdExtZicfiss] in {
+ def CMOP1 : CMOPInst<0, "cmop.1">, Sched<[]>;
+ def CMOP5 : CMOPInst<2, "cmop.5">, Sched<[]>;
+}
+
+foreach n = [3, 7, 9, 11, 13, 15] in {
+ let Predicates = [HasStdExtZcmop] in
+ def CMOP # n : CMOPInst<!srl(n, 1), "cmop." # n>, Sched<[]>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
new file mode 100644
index 000000000000..49a57f86cccd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
@@ -0,0 +1,72 @@
+//===------ RISCVInstrInfoZicfiss.td - RISC-V Zicfiss -*- tablegen -*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+class RVC_SSInst<bits<5> rs1val, RegisterClass reg_class, string opcodestr> :
+ RVInst16<(outs), (ins reg_class:$rs1), opcodestr, "$rs1", [], InstFormatOther> {
+ let Inst{15-13} = 0b011;
+ let Inst{12} = 0;
+ let Inst{11-7} = rs1val;
+ let Inst{6-2} = 0b00000;
+ let Inst{1-0} = 0b01;
+ let DecoderMethod = "decodeCSSPushPopchk";
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZicfiss] in {
+let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def SSPOPCHK : RVInstI<0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs1), "sspopchk",
+ "$rs1"> {
+ let rd = 0;
+ let imm12 = 0b110011011100;
+} // Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0
+
+let Uses = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+def SSRDP : RVInstI<0b100, OPC_SYSTEM, (outs GPRNoX0:$rd), (ins), "ssrdp", "$rd"> {
+ let imm12 = 0b110011011100;
+ let rs1 = 0b00000;
+}
+} // Uses = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+def SSPUSH : RVInstR<0b1100111, 0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs2),
+ "sspush", "$rs2"> {
+ let rd = 0b00000;
+ let rs1 = 0b00000;
+}
+} // Predicates = [HasStdExtZicfiss]
+
+let Predicates = [HasStdExtZicfiss, HasStdExtZcmop],
+ DecoderNamespace = "Zicfiss" in {
+let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+def C_SSPUSH : RVC_SSInst<0b00001, GPRX1, "c.sspush">;
+
+let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def C_SSPOPCHK : RVC_SSInst<0b00101, GPRX5, "c.sspopchk">;
+} // Predicates = [HasStdExtZicfiss, HasStdExtZcmop]
+
+let Predicates = [HasStdExtZicfiss] in
+defm SSAMOSWAP_W : AMO_rr_aq_rl<0b01001, 0b010, "ssamoswap.w">;
+
+let Predicates = [HasStdExtZicfiss, IsRV64] in
+defm SSAMOSWAP_D : AMO_rr_aq_rl<0b01001, 0b011, "ssamoswap.d">;
+
+//===----------------------------------------------------------------------===/
+// Compress Instruction tablegen backend.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZicfiss, HasStdExtZcmop] in {
+def : CompressPat<(SSPUSH X1), (C_SSPUSH X1)>;
+def : CompressPat<(SSPOPCHK X5), (C_SSPOPCHK X5)>;
+} // Predicates = [HasStdExtZicfiss, HasStdExtZcmop]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
new file mode 100644
index 000000000000..1e8c70046c63
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
@@ -0,0 +1,59 @@
+//===-- RISCVInstrInfoZimop.td -----------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard
+// May-Be-Operations Extension (Zimop).
+// This version is still experimental as the 'Zimop' extension hasn't been
+// ratified yet. It is based on v0.1 of the specification.
+//
+//===----------------------------------------------------------------------===//
+
+class RVInstIMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, RISCVOpcode opcode,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31} = imm7{6};
+ let Inst{30} = imm5{4};
+ let Inst{29-28} = imm7{5-4};
+ let Inst{27-26} = imm5{3-2};
+ let Inst{25-22} = imm7{3-0};
+ let Inst{21-20} = imm5{1-0};
+}
+
+class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+ let Inst{31} = imm4{3};
+ let Inst{30} = imm3{2};
+ let Inst{29-28} = imm4{2-1};
+ let Inst{27-26} = imm3{1-0};
+ let Inst{25} = imm4{0};
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
+ RISCVOpcode opcode, string opcodestr>
+ : RVInstIMopr<imm7, imm5, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3,
+ RISCVOpcode opcode, string opcodestr>
+ : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2">;
+
+foreach i = 0...31 in {
+ let Predicates = [HasStdExtZimop] in
+ def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
+ Sched<[]>;
+}
+
+foreach i = 0...7 in {
+ let Predicates = [HasStdExtZimop] in
+ def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
+ Sched<[]>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
index 6362a3bef6f2..ba8996e710ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -174,8 +174,7 @@ def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76",
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC,
- FeatureStdExtZihintpause,
- FeatureVendorXSfcie],
+ FeatureStdExtZihintpause],
[TuneSiFive7]>;
def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a3c19115bd31..24f8d600f1ea 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -127,6 +127,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::X27);
}
+ // Shadow stack pointer.
+ markSuperRegs(Reserved, RISCV::SSP);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index c59c9b294d79..840fd149d681 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -137,6 +137,8 @@ def GPR : GPRRegisterClass<(add (sequence "X%u", 10, 17),
(sequence "X%u", 0, 4))>;
def GPRX0 : GPRRegisterClass<(add X0)>;
+def GPRX1 : GPRRegisterClass<(add X1)>;
+def GPRX5 : GPRRegisterClass<(add X5)>;
def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)>;
@@ -165,6 +167,8 @@ def SP : GPRRegisterClass<(add X2)>;
def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9),
(sequence "X%u", 18, 23))>;
+def GPRX1X5 : GPRRegisterClass<(add X1, X5)>;
+
// Floating point registers
let RegAltNameIndices = [ABIRegAltName] in {
def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>;
@@ -591,3 +595,6 @@ foreach m = LMULList in {
// Special registers
def FFLAGS : RISCVReg<0, "fflags">;
def FRM : RISCVReg<0, "frm">;
+
+// Shadow Stack register
+def SSP : RISCVReg<0, "ssp">;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 953df7b15e2f..43475e825b46 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -19,9 +19,11 @@ include "llvm/TableGen/SearchableTable.td"
class SysReg<string name, bits<12> op> {
string Name = name;
- // A maximum of one deprecated name is supported right now. It generates a
- // diagnostic when the name is used to encourage software to migrate away from
- // the name.
+ // A maximum of one alias is supported right now.
+ string AltName = name;
+ // A maximum of one deprecated name is supported right now. Unlike the
+ // `AltName` alias, a `DeprecatedName` generates a diagnostic when the name is
+ // used to encourage software to migrate away from the name.
string DeprecatedName = "";
bits<12> Encoding = op;
// FIXME: add these additional fields when needed.
@@ -41,7 +43,7 @@ def SysRegsList : GenericTable {
let FilterClass = "SysReg";
// FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
let Fields = [
- "Name", "DeprecatedName", "Encoding", "FeaturesRequired",
+ "Name", "AltName", "DeprecatedName", "Encoding", "FeaturesRequired",
"isRV32Only",
];
@@ -54,32 +56,13 @@ def lookupSysRegByName : SearchIndex {
let Key = [ "Name" ];
}
-def lookupSysRegByDeprecatedName : SearchIndex {
+def lookupSysRegByAltName : SearchIndex {
let Table = SysRegsList;
- let Key = [ "DeprecatedName" ];
-}
-
-class SiFiveReg<string name, bits<12> op> : SysReg<name, op>;
-
-def SiFiveRegsList : GenericTable {
- let FilterClass = "SiFiveReg";
- // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
- let Fields = [
- "Name", "DeprecatedName", "Encoding", "FeaturesRequired",
- "isRV32Only",
- ];
-
- let PrimaryKey = [ "Encoding" ];
- let PrimaryKeyName = "lookupSiFiveRegByEncoding";
+ let Key = [ "AltName" ];
}
-def lookupSiFiveRegByName : SearchIndex {
- let Table = SiFiveRegsList;
- let Key = [ "Name" ];
-}
-
-def lookupSiFiveRegByDeprecatedName : SearchIndex {
- let Table = SiFiveRegsList;
+def lookupSysRegByDeprecatedName : SearchIndex {
+ let Table = SysRegsList;
let Key = [ "DeprecatedName" ];
}
@@ -309,7 +292,7 @@ foreach i = 3...31 in
//===----------------------------------------------------------------------===//
// Machine Counter Setup
//===----------------------------------------------------------------------===//
-let DeprecatedName = "mucounteren" in // Privileged spec v1.9.1 Name
+let AltName = "mucounteren" in // Privileged spec v1.9.1 Name
def : SysReg<"mcountinhibit", 0x320>;
// mhpmevent3-mhpmevent31 at 0x323-0x33F.
@@ -323,20 +306,6 @@ foreach i = 3...31 in {
}
//===----------------------------------------------------------------------===//
-// SiFive Custom Machine Mode Registers
-//===----------------------------------------------------------------------===//
-
-let FeaturesRequired = [{ {RISCV::FeatureVendorXSfcie} }] in {
-def : SiFiveReg<"mnscratch", 0x350>;
-def : SiFiveReg<"mnepc", 0x351>;
-def : SiFiveReg<"mncause", 0x352>;
-def : SiFiveReg<"mnstatus", 0x353>;
-def : SiFiveReg<"mbpm", 0x7C0>;
-def : SiFiveReg<"mfd", 0x7C1>;
-def : SiFiveReg<"mpd", 0x7C8>;
-}
-
-//===----------------------------------------------------------------------===//
// Debug/ Trace Registers (shared with Debug Mode)
//===----------------------------------------------------------------------===//
def : SysReg<"tselect", 0x7A0>;
@@ -353,7 +322,7 @@ def : SysReg<"dpc", 0x7B1>;
// "dscratch" is an alternative name for "dscratch0" which appeared in earlier
// drafts of the RISC-V debug spec
-let DeprecatedName = "dscratch" in
+let AltName = "dscratch" in
def : SysReg<"dscratch0", 0x7B2>;
def : SysReg<"dscratch1", 0x7B3>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 96ecc771863e..4c955744b37d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -359,7 +359,8 @@ public:
const TargetTransformInfo::LSRCost &C2);
bool shouldFoldTerminatingConditionAfterLSR() const {
- return true;
+ // FIXME: Enabling this causes miscompiles.
+ return false;
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 3a34a0bfae46..6c009b9e8dde 100644
--- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -959,8 +959,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(
// N is the number of elements of the vector.
Type *Ty;
- if (TypeStr.starts_with("atomic_"))
- TypeStr = TypeStr.substr(strlen("atomic_"));
+ TypeStr.consume_front("atomic_");
if (TypeStr.starts_with("void")) {
Ty = Type::getVoidTy(Ctx);
@@ -1007,8 +1006,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(
// Handle "typeN*" or "type vector[N]*".
bool IsPtrToVec = TypeStr.consume_back("*");
- if (TypeStr.starts_with(" vector[")) {
- TypeStr = TypeStr.substr(strlen(" vector["));
+ if (TypeStr.consume_front(" vector[")) {
TypeStr = TypeStr.substr(0, TypeStr.find(']'));
}
TypeStr.getAsInteger(10, VecElts);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 4f0801479211..78bdf3ae9a84 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2050,7 +2050,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC_V9))) &&
isOneConstant(LHS.getOperand(0)) && isNullConstant(LHS.getOperand(1))) {
SDValue CMPCC = LHS.getOperand(3);
- SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ SPCC = LHS.getConstantOperandVal(2);
LHS = CMPCC.getOperand(0);
RHS = CMPCC.getOperand(1);
}
@@ -3186,7 +3186,7 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
SDValue SparcTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 559f2ca476d7..045c4c0aac07 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2186,7 +2186,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// the mask of valid CC values if so.
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
unsigned &CCValid) {
- unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Id = Op.getConstantOperandVal(1);
switch (Id) {
case Intrinsic::s390_tbegin:
Opcode = SystemZISD::TBEGIN;
@@ -2212,7 +2212,7 @@ static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
// CC value as its final argument. Provide the associated SystemZISD
// opcode and the mask of valid CC values if so.
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
- unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Id = Op.getConstantOperandVal(0);
switch (Id) {
case Intrinsic::s390_vpkshs:
case Intrinsic::s390_vpksfs:
@@ -2600,10 +2600,9 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
return true;
if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
return true;
- if (C.ICmpType != SystemZICMP::SignedOnly &&
- Opcode0 == ISD::AND &&
+ if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
+ C.Op0.getConstantOperandVal(1) == 0xffffffff)
return true;
return false;
@@ -3429,11 +3428,9 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
return (Neg.getOpcode() == ISD::SUB &&
Neg.getOperand(0).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
- Neg.getOperand(1) == Pos &&
- (Pos == CmpOp ||
- (Pos.getOpcode() == ISD::SIGN_EXTEND &&
- Pos.getOperand(0) == CmpOp)));
+ Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
+ (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
+ Pos.getOperand(0) == CmpOp)));
}
// Return the absolute or negative absolute of Op; IsNegative decides which.
@@ -3740,7 +3737,7 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
MFI.setFrameAddressIsTaken(true);
SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// By definition, the frame address is the address of the back chain. (In
@@ -3776,7 +3773,7 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
return SDValue();
SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
if (Depth > 0) {
@@ -4226,7 +4223,7 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
if (HighOp.getOpcode() == ISD::AND &&
HighOp.getOperand(1).getOpcode() == ISD::Constant) {
SDValue HighOp0 = HighOp.getOperand(0);
- uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
+ uint64_t Mask = HighOp.getConstantOperandVal(1);
if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
HighOp = HighOp0;
}
@@ -4485,10 +4482,10 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
- SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
- cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+ AtomicOrdering FenceOrdering =
+ static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
+ SyncScope::ID FenceSSID =
+ static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
@@ -4773,13 +4770,13 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
SelectionDAG &DAG) const {
- bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ bool IsData = Op.getConstantOperandVal(4);
if (!IsData)
// Just preserve the chain.
return Op.getOperand(0);
SDLoc DL(Op);
- bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ bool IsWrite = Op.getConstantOperandVal(2);
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
@@ -4825,7 +4822,7 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
}
- unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Id = Op.getConstantOperandVal(0);
switch (Id) {
case Intrinsic::thread_pointer:
return lowerThreadPointer(SDLoc(Op), DAG);
@@ -5628,7 +5625,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
Op = Op.getOperand(0);
if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op.getOperand(1).getOpcode() == ISD::Constant) {
- unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Elem = Op.getConstantOperandVal(1);
if (!GS.add(Op.getOperand(0), Elem))
return SDValue();
FoundOne = true;
@@ -6727,8 +6724,7 @@ SDValue SystemZTargetLowering::combineLOAD(
int Index = 1;
if (User->getOpcode() == ISD::SRL &&
User->getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(User->getOperand(1))->getZExtValue() == 64 &&
- User->hasOneUse()) {
+ User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
User = *User->use_begin();
Index = 0;
}
@@ -6857,7 +6853,7 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
std::swap(Op0, Op1);
if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
Op1.getOperand(1).getOpcode() != ISD::Constant ||
- cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue() != 64)
+ Op1.getConstantOperandVal(1) != 64)
return false;
Op1 = Op1.getOperand(0);
@@ -7149,20 +7145,18 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
SDValue Op0 = N->getOperand(OpNo);
- if (N->getValueType(0) == MVT::f32 &&
- Op0.hasOneUse() &&
+ if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getValueType() == MVT::v2f64 &&
Op0.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ Op0.getConstantOperandVal(1) == 0) {
SDValue Vec = Op0.getOperand(0);
for (auto *U : Vec->uses()) {
- if (U != Op0.getNode() &&
- U->hasOneUse() &&
+ if (U != Op0.getNode() && U->hasOneUse() &&
U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
U->getOperand(0) == Vec &&
U->getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+ U->getConstantOperandVal(1) == 1) {
SDValue OtherRound = SDValue(*U->use_begin(), 0);
if (OtherRound.getOpcode() == N->getOpcode() &&
OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
@@ -7215,20 +7209,18 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
SDValue Op0 = N->getOperand(OpNo);
- if (N->getValueType(0) == MVT::f64 &&
- Op0.hasOneUse() &&
+ if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getValueType() == MVT::v4f32 &&
Op0.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ Op0.getConstantOperandVal(1) == 0) {
SDValue Vec = Op0.getOperand(0);
for (auto *U : Vec->uses()) {
- if (U != Op0.getNode() &&
- U->hasOneUse() &&
+ if (U != Op0.getNode() && U->hasOneUse() &&
U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
U->getOperand(0) == Vec &&
U->getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
+ U->getConstantOperandVal(1) == 2) {
SDValue OtherExtend = SDValue(*U->use_begin(), 0);
if (OtherExtend.getOpcode() == N->getOpcode() &&
OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
@@ -7605,7 +7597,7 @@ SDValue SystemZTargetLowering::combineINTRINSIC(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned Id = N->getConstantOperandVal(1);
switch (Id) {
// VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
// or larger is simply a vector load.
@@ -7679,7 +7671,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
APInt SrcDemE;
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
- unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Id = Op.getConstantOperandVal(0);
switch (Id) {
case Intrinsic::s390_vpksh: // PACKS
case Intrinsic::s390_vpksf:
@@ -7723,7 +7715,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
SrcDemE = APInt(NumElts, 0);
if (!DemandedElts[OpNo - 1])
break;
- unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Mask = Op.getConstantOperandVal(3);
unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
// Demand input element 0 or 1, given by the mask bit value.
SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
@@ -7732,7 +7724,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
case Intrinsic::s390_vsldb: {
// VECTOR SHIFT LEFT DOUBLE BY BYTE
assert(VT == MVT::v16i8 && "Unexpected type.");
- unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned FirstIdx = Op.getConstantOperandVal(3);
assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
unsigned NumSrc0Els = 16 - FirstIdx;
SrcDemE = APInt(NumElts, 0);
@@ -7808,7 +7800,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
bool IsLogical = false;
- unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Id = Op.getConstantOperandVal(0);
switch (Id) {
case Intrinsic::s390_vpksh: // PACKS
case Intrinsic::s390_vpksf:
@@ -7908,7 +7900,7 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
- unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Id = Op.getConstantOperandVal(0);
switch (Id) {
case Intrinsic::s390_vpksh: // PACKS
case Intrinsic::s390_vpksf:
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
index af6cf340f8a3..d98bb886c185 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -507,11 +507,11 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs),
// Signed and unsigned comparisons.
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
- unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Type = N->getConstantOperandVal(2);
return Type != SystemZICMP::UnsignedOnly;
}]>;
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
- unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Type = N->getConstantOperandVal(2);
return Type != SystemZICMP::SignedOnly;
}]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
index 0267aefd1e91..0e41a2d7aa03 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1101,10 +1101,10 @@ Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
- SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
- cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+ AtomicOrdering FenceOrdering =
+ static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
+ SyncScope::ID FenceSSID =
+ static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
// VE uses Release consistency, so need a fence instruction if it is a
// cross-thread fence.
@@ -1766,7 +1766,7 @@ static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
switch (IntNo) {
default: // Don't custom lower most intrinsics.
return SDValue();
@@ -2937,8 +2937,8 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) {
if (User->getOperand(1).getNode() != N &&
User->getOperand(2).getNode() != N &&
isa<ConstantSDNode>(User->getOperand(3))) {
- VECC::CondCode VECCVal = static_cast<VECC::CondCode>(
- cast<ConstantSDNode>(User->getOperand(3))->getZExtValue());
+ VECC::CondCode VECCVal =
+ static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
return isIntVECondCode(VECCVal);
}
[[fallthrough]];
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index bc5f562d9589..051f6caa8c04 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -108,6 +108,8 @@ class X86AsmParser : public MCTargetAsmParser {
// Does this instruction use apx extended register?
bool UseApxExtendedReg = false;
+ // Is this instruction explicitly required not to update flags?
+ bool ForcedNoFlag = false;
private:
SMLoc consumeToken() {
@@ -2312,8 +2314,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
// Drop the optional '.'.
StringRef DotDispStr = Tok.getString();
- if (DotDispStr.starts_with("."))
- DotDispStr = DotDispStr.drop_front(1);
+ DotDispStr.consume_front(".");
StringRef TrailingDot;
// .Imm gets lexed as a real.
@@ -3126,6 +3127,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
ForcedVEXEncoding = VEXEncoding_Default;
ForcedDispEncoding = DispEncoding_Default;
UseApxExtendedReg = false;
+ ForcedNoFlag = false;
// Parse pseudo prefixes.
while (true) {
@@ -3150,6 +3152,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
ForcedDispEncoding = DispEncoding_Disp8;
else if (Prefix == "disp32")
ForcedDispEncoding = DispEncoding_Disp32;
+ else if (Prefix == "nf")
+ ForcedNoFlag = true;
else
return Error(NameLoc, "unknown prefix");
@@ -3997,6 +4001,8 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
return Match_Unsupported;
+ if (ForcedNoFlag != !!(MCID.TSFlags & X86II::EVEX_NF))
+ return Match_Unsupported;
if (ForcedVEXEncoding == VEXEncoding_EVEX &&
(MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 59e2008f5632..347dc0d4ed43 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1169,7 +1169,11 @@ static int getInstructionID(struct InternalInstruction *insn,
attrMask |= ATTR_EVEXKZ;
if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXB;
- if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
+ // nf bit is the MSB of aaa
+ if (nfFromEVEX4of4(insn->vectorExtensionPrefix[3]) &&
+ insn->opcodeType == MAP4)
+ attrMask |= ATTR_EVEXNF;
+ else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXK;
if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_VEXL;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index decc45091941..4c7b1c094522 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -103,6 +103,7 @@ namespace X86Disassembler {
#define bFromEVEX4of4(evex) bitFromOffset4(evex)
#define v2FromEVEX4of4(evex) invertedBitFromOffset3(evex)
#define aaaFromEVEX4of4(evex) threeBitsFromOffset0(evex)
+#define nfFromEVEX4of4(evex) bitFromOffset2(evex)
// These enums represent Intel registers for use by the decoder.
#define REGS_8BIT \
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index b0fcaef5f4b0..e006dd877360 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -870,7 +870,10 @@ enum : uint64_t {
ExplicitVEXPrefix = 2ULL << ExplicitOpPrefixShift,
/// For instructions that are promoted to EVEX space for EGPR.
ExplicitEVEXPrefix = 3ULL << ExplicitOpPrefixShift,
- ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift
+ ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift,
+ /// EVEX_NF - Set if this instruction has EVEX.NF field set.
+ EVEX_NFShift = ExplicitOpPrefixShift + 2,
+ EVEX_NF = 1ULL << EVEX_NFShift
};
/// \returns true if the instruction with given opcode is a prefix.
@@ -992,6 +995,12 @@ inline unsigned getOperandBias(const MCInstrDesc &Desc) {
}
}
+/// \returns true if the instruction has a NDD (new data destination).
+inline bool hasNewDataDest(uint64_t TSFlags) {
+ return (TSFlags & X86II::OpMapMask) == X86II::T_MAP4 &&
+ (TSFlags & X86II::EVEX_B) && (TSFlags & X86II::VEX_4V);
+}
+
/// \returns operand # for the first field of the memory operand or -1 if no
/// memory operands.
/// NOTE: This ignores tied operands. If there is a tied register which is
@@ -1018,7 +1027,7 @@ inline int getMemoryOperandNo(uint64_t TSFlags) {
return -1;
case X86II::MRMDestMem:
case X86II::MRMDestMemFSIB:
- return 0;
+ return hasNewDataDest(TSFlags);
case X86II::MRMSrcMem:
case X86II::MRMSrcMemFSIB:
// Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index cab2f0a2e1c1..1947313a9dfb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -369,6 +369,9 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
else if (Flags & X86::IP_HAS_REPEAT)
O << "\trep\t";
+ if (TSFlags & X86II::EVEX_NF)
+ O << "\t{nf}";
+
// These all require a pseudo prefix
if ((Flags & X86::IP_USE_VEX) ||
(TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 9e1f1eb97e70..924956295e7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -251,6 +251,7 @@ public:
void setAAA(const MCInst &MI, unsigned OpNum) {
EVEX_aaa = getRegEncoding(MI, OpNum);
}
+ void setNF(bool V) { EVEX_aaa |= V << 2; }
X86OpcodePrefixHelper(const MCRegisterInfo &MRI)
: W(0), R(0), X(0), B(0), M(0), R2(0), X2(0), B2(0), VEX_4V(0), VEX_L(0),
@@ -987,9 +988,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
}
Prefix.setW(TSFlags & X86II::REX_W);
+ Prefix.setNF(TSFlags & X86II::EVEX_NF);
bool HasEVEX_K = TSFlags & X86II::EVEX_K;
bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool IsND = X86II::hasNewDataDest(TSFlags); // IsND implies HasVEX_4V
bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
switch (TSFlags & X86II::OpMapMask) {
@@ -1049,6 +1052,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
bool EncodeRC = false;
uint8_t EVEX_rc = 0;
+
unsigned CurOp = X86II::getOperandBias(Desc);
switch (TSFlags & X86II::FormMask) {
@@ -1073,16 +1077,21 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// MemAddr, src1(VEX_4V), src2(ModR/M)
// MemAddr, src1(ModR/M), imm8
//
+ // NDD:
+ // dst(VEX_4V), MemAddr, src1(ModR/M)
Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
Prefix.setV2(MI, MemOperand + X86::AddrIndexReg, HasVEX_4V);
+ if (IsND)
+ Prefix.set4VV2(MI, CurOp++);
+
CurOp += X86::AddrNumOperands;
if (HasEVEX_K)
Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
Prefix.set4VV2(MI, CurOp++);
Prefix.setRR2(MI, CurOp++);
@@ -1098,12 +1107,18 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
//
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(Imm[7:4])
+ //
+ // NDD:
+ // dst(VEX_4V), src1(ModR/M), MemAddr
+ if (IsND)
+ Prefix.set4VV2(MI, CurOp++);
+
Prefix.setRR2(MI, CurOp++);
if (HasEVEX_K)
Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
Prefix.set4VV2(MI, CurOp++);
Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
@@ -1160,12 +1175,17 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
//
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
+ //
+ // NDD:
+ // dst(VEX_4V), src1(ModR/M.reg), src2(ModR/M)
+ if (IsND)
+ Prefix.set4VV2(MI, CurOp++);
Prefix.setRR2(MI, CurOp++);
if (HasEVEX_K)
Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
Prefix.set4VV2(MI, CurOp++);
Prefix.setBB2(MI, CurOp);
@@ -1209,6 +1229,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ //
+ // NDD:
+ // dst(VEX_4V), src1(ModR/M), src2(ModR/M)
+ if (IsND)
+ Prefix.set4VV2(MI, CurOp++);
Prefix.setBB2(MI, CurOp);
Prefix.setX(MI, CurOp, 4);
++CurOp;
@@ -1216,7 +1241,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
if (HasEVEX_K)
Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
Prefix.set4VV2(MI, CurOp++);
Prefix.setRR2(MI, CurOp++);
@@ -1508,6 +1533,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI,
unsigned OpcodeOffset = 0;
+ bool IsND = X86II::hasNewDataDest(TSFlags);
+
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
default:
@@ -1576,6 +1603,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
+ if (IsND) // Skip the NDD operand encoded in EVEX_VVVV
+ ++CurOp;
emitRegModRMByte(MI.getOperand(CurOp),
getX86RegNum(MI.getOperand(SrcRegNum)), CB);
@@ -1602,6 +1631,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
+ if (IsND) // Skip new data destination
+ ++CurOp;
+
bool ForceSIB = (Form == X86II::MRMDestMemFSIB);
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
Kind, StartByte, CB, Fixups, STI, ForceSIB);
@@ -1669,6 +1701,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI,
case X86II::MRMSrcMem: {
unsigned FirstMemOp = CurOp + 1;
+ if (IsND) // Skip new data destination
+ CurOp++;
+
if (HasEVEX_K) // Skip writemask
++FirstMemOp;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
index 5fd6828f4312..e89ddcc570c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
@@ -1256,11 +1256,6 @@ def ProcessorFeatures {
list<SubtargetFeature> SRFFeatures =
!listconcat(ADLFeatures, SRFAdditionalFeatures);
- // Grandridge
- list<SubtargetFeature> GRRAdditionalFeatures = [FeatureRAOINT];
- list<SubtargetFeature> GRRFeatures =
- !listconcat(SRFFeatures, GRRAdditionalFeatures);
-
// Arrowlake S
list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
FeatureSHA512,
@@ -1706,10 +1701,10 @@ foreach P = ["goldmont_plus", "goldmont-plus"] in {
}
def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
ProcessorFeatures.TRMTuning>;
-def : ProcModel<"sierraforest", AlderlakePModel, ProcessorFeatures.SRFFeatures,
- ProcessorFeatures.TRMTuning>;
-def : ProcModel<"grandridge", AlderlakePModel, ProcessorFeatures.GRRFeatures,
+foreach P = ["sierraforest", "grandridge"] in {
+ def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
ProcessorFeatures.TRMTuning>;
+}
// "Arrandale" along with corei3 and corei5
foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 77a997588c4f..73b10cf3067e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -487,7 +487,7 @@ namespace {
// from PatFrags in tablegen.
bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
- const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ const APInt &Val = N->getConstantOperandAPInt(1);
if (Val.countr_one() >= Width)
return true;
@@ -5233,7 +5233,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
case X86ISD::VPTERNLOG: {
- uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
+ uint8_t Imm = Node->getConstantOperandVal(3);
if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2), Imm))
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 63bdf24d6b4f..1e4b1361f98a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2267,6 +2267,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom);
addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
@@ -2282,6 +2284,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32bf16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32bf16, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
@@ -3737,9 +3741,11 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
// type. This ensures they get CSE'd. But if the integer type is not
// available, use a floating-point +0.0 instead.
SDValue Vec;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
- } else if (VT.isFloatingPoint()) {
+ } else if (VT.isFloatingPoint() &&
+ TLI.isTypeLegal(VT.getVectorElementType())) {
Vec = DAG.getConstantFP(+0.0, dl, VT);
} else if (VT.getVectorElementType() == MVT::i1) {
assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
@@ -31752,7 +31758,7 @@ static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned IsData = Op.getConstantOperandVal(4);
// We don't support non-data prefetch without PREFETCHI.
// Just preserve the chain.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 6c23928228d2..9aa70dff5f93 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -135,8 +135,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
int64_t D = static_cast<int64_t>(S_V.second);
unsigned IID = 0;
for (const auto &HintType : HintTypes) {
- if (Name.starts_with(HintType.first)) {
- Name = Name.drop_front(HintType.first.size());
+ if (Name.consume_front(HintType.first)) {
IID = HintType.second;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
index 7f3e193d9a1b..c47bee070e04 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
@@ -14,35 +14,45 @@
//===----------------------------------------------------------------------===//
// AMX instructions
-let Predicates = [HasAMXTILE, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- let hasSideEffects = 1,
- Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
- def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
- "ldtilecfg\t$src",
- [(int_x86_ldtilecfg addr:$src)]>, VEX, T8;
- let hasSideEffects = 1 in
- def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
- "sttilecfg\t$src",
- [(int_x86_sttilecfg addr:$src)]>, VEX, T8, PD;
- let mayLoad = 1 in
- def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
- (ins sibmem:$src),
- "tileloadd\t{$src, $dst|$dst, $src}", []>,
- VEX, T8, XD;
- let mayLoad = 1 in
- def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
- (ins sibmem:$src),
- "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
- VEX, T8, PD;
+multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
+let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
+ let hasSideEffects = 1,
+ Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+ def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
+ "ldtilecfg\t$src",
+ [(int_x86_ldtilecfg addr:$src)]>,
+ T8, PS;
+ let hasSideEffects = 1 in
+ def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
+ "sttilecfg\t$src",
+ [(int_x86_sttilecfg addr:$src)]>,
+ T8, PD;
+ let mayLoad = 1 in
+ def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src),
+ "tileloadd\t{$src, $dst|$dst, $src}", []>,
+ T8, XD;
+ let mayLoad = 1 in
+ def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src),
+ "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
+ T8, PD;
+ let mayStore = 1 in
+ def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
+ (ins sibmem:$dst, TILE:$src),
+ "tilestored\t{$src, $dst|$dst, $src}", []>,
+ T8, XS;
+}
+}
+
+let SchedRW = [WriteSystem] in {
+ defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
+ defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
+
+ let Predicates = [HasAMXTILE, In64BitMode] in {
let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
- "tilerelease", [(int_x86_tilerelease)]>, VEX, T8;
- let mayStore = 1 in
- def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
- (ins sibmem:$dst, TILE:$src),
- "tilestored\t{$src, $dst|$dst, $src}", []>,
- VEX, T8, XS;
+ "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
"tilezero\t$dst", []>,
VEX, T8, XD;
@@ -82,8 +92,8 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
[(int_x86_tilezero timm:$src)]>;
}
- } // SchedRW
-} // HasAMXTILE
+ } // Predicates
+} // SchedRW
let Predicates = [HasAMXINT8, In64BitMode] in {
let SchedRW = [WriteSystem] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index 7c3c1d5fe42b..c3a673f97d34 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1447,6 +1447,17 @@ def : Pat<(vselect_mask VK8WM:$mask,
(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
}
+let Predicates = [HasBF16] in {
+ def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
+ (VBROADCASTF64X4rm addr:$src)>;
+ def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4rm addr:$src)>;
+}
+
+let Predicates = [HasBF16, HasVLX] in
+ def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4Z256rm addr:$src)>;
+
let Predicates = [HasVLX, HasDQI] in {
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
index 936db48bb9df..6b0c1b8c28c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -44,591 +44,298 @@ def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>;
def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>;
}
-// BinOpRR - Instructions that read "reg, reg".
-class BinOpRR<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p>
- : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m,
- binop_args, p>, Sched<[WriteALU]>;
-// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only.
-class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
- : BinOpRR<o, m, t, (outs),
- [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>,
- DefEFLAGS;
-// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F
-class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t>
- : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly {
- let Form = MRMSrcReg;
-}
-// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS.
-class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
- : BinOpRR<o, m, t, (outs t.RegClass:$dst),
- [(set t.RegClass:$dst, EFLAGS,
- (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS;
-// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF.
-class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t>
- : BinOpRR_RF<o, m, t, null_frag>, DisassembleOnly {
- let Form = MRMSrcReg;
-}
-// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write
-// EFLAGS.
-class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
- : BinOpRR<o, m, t, (outs t.RegClass:$dst),
- [(set t.RegClass:$dst, EFLAGS,
- (node t.RegClass:$src1, t.RegClass:$src2,
- EFLAGS))]>, DefEFLAGS, UseEFLAGS {
- let SchedRW = [WriteADC];
-}
-// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF
-class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t>
- : BinOpRRF_RF<o, m, t, null_frag>, DisassembleOnly {
- let Form = MRMSrcReg;
-}
-
-// BinOpRM - Instructions that read "reg, [mem]".
-class BinOpRM<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p>
- : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m,
- binop_args, p>,
- Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> {
- let mayLoad = 1;
-}
-// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only.
-class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node>
- : BinOpRM<o, m, t, (outs),
- [(set EFLAGS, (node t.RegClass:$src1,
- (t.LoadNode addr:$src2)))]>, DefEFLAGS;
-// BinOpRM_RF - Instructions that read "reg, reg", and write "reg", EFLAGS.
-class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
- : BinOpRM<o, m, t, (outs t.RegClass:$dst),
- [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1,
- (t.LoadNode addr:$src2)))]>, DefEFLAGS;
-// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write
-// EFLAGS.
-class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
- : BinOpRM<o, m, t, (outs t.RegClass:$dst),
- [(set t.RegClass:$dst, EFLAGS,
- (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>,
- DefEFLAGS, UseEFLAGS {
- let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
- // base, scale, index, offset, segment.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // implicit register read.
- WriteADC.ReadAfterFold];
-}
-
-// BinOpRI - Instructions that read "reg, imm".
-class BinOpRI<bits<8> o, string m, X86TypeInfo t, Format f, dag out, list<dag> p>
- : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m,
- binop_args, p>, Sched<[WriteALU]> {
- let ImmT = t.ImmEncoding;
-}
-// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only.
-class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
- Format f>
- : BinOpRI<o, m, t, f, (outs),
- [(set EFLAGS, (node t.RegClass:$src1,
- t.ImmOperator:$src2))]>, DefEFLAGS;
-// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS.
-class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
- : BinOpRI<o, m, t, f, (outs t.RegClass:$dst),
- [(set t.RegClass:$dst, EFLAGS,
- (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS;
-// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write
-// EFLAGS.
-class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
- : BinOpRI<o, m, t, f, (outs t.RegClass:$dst),
- [(set t.RegClass:$dst, EFLAGS,
- (node t.RegClass:$src1, t.ImmOperator:$src2,
- EFLAGS))]>, DefEFLAGS, UseEFLAGS {
- let SchedRW = [WriteADC];
-}
-// BinOpRI8 - Instructions that read "reg, imm8".
-class BinOpRI8<bits<8> o, string m, X86TypeInfo t, Format f, dag out>
- : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m,
- binop_args, []>, Sched<[WriteALU]> {
- let ImmT = Imm8;
-}
-// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only.
-class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f>
- : BinOpRI8<o, m, t, f, (outs)>, DefEFLAGS;
-// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS.
-class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f>
- : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS;
-// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write
-// EFLAGS.
-class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f>
- : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS {
- let SchedRW = [WriteADC];
-}
-
-// BinOpMR - Instructions that read "[mem], reg".
-class BinOpMR<bits<8> o, string m, X86TypeInfo t, list<dag> p>
- : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2), m,
- binop_args, p> {
- let mayLoad = 1;
-}
-// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only.
-class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
- : BinOpMR<o, m, t,
- [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>,
- Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS;
-// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS.
-class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDNode node>
- : BinOpMR<o, m, t,
- [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1),
- (implicit EFLAGS)]>,
- Sched<[WriteALURMW,
- // base, scale, index, offset, segment
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- WriteALU.ReadAfterFold]>, // reg
- DefEFLAGS {
- let mayStore = 1;
-}
-// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and
-// read/write EFLAGS.
-class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node>
- : BinOpMR<o, m, t,
- [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS),
- addr:$src1), (implicit EFLAGS)]>,
- Sched<[WriteADCRMW,
- // base, scale, index, offset, segment
- ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault,
- WriteALU.ReadAfterFold, // reg
- WriteALU.ReadAfterFold]>, // EFLAGS
- DefEFLAGS, UseEFLAGS {
- let mayStore = 1;
-}
-
-// BinOpMI - Instructions that read "[mem], imm".
-class BinOpMI<bits<8> o, string m, X86TypeInfo t, Format f, list<dag> p>
- : ITy<o, f, t, (outs), (ins t.MemOperand:$src1, t.ImmOperand:$src2), m,
- binop_args, p> {
- let ImmT = t.ImmEncoding;
- let mayLoad = 1;
-}
-// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only.
-class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
- Format f>
- : BinOpMI<o, m, t, f,
- [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>,
- Sched<[WriteALU.Folded]>, DefEFLAGS;
-// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS.
-class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
- : BinOpMI<o, m, t, f,
- [(store (node (t.VT (load addr:$src1)),
- t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>,
- Sched<[WriteALURMW]>, DefEFLAGS {
- let mayStore = 1;
-}
-// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and
-// read/write EFLAGS.
-class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
- : BinOpMI<o, m, t, f,
- [(store (node (t.VT (load addr:$src1)),
- t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>,
- Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS {
- let mayStore = 1;
-}
-
-// BinOpMI8 - Instructions that read "[mem], imm8".
-class BinOpMI8<string m, X86TypeInfo t, Format f>
- : ITy<0x83, f, t, (outs), (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m,
- binop_args, []> {
- let ImmT = Imm8;
- let mayLoad = 1;
-}
-// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only.
-class BinOpMI8_F<string m, X86TypeInfo t, Format f>
- : BinOpMI8<m, t, f>, Sched<[WriteALU.Folded]>, DefEFLAGS;
-// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS.
-class BinOpMI8_MF<string m, X86TypeInfo t, Format f>
- : BinOpMI8<m, t, f>, Sched<[WriteALURMW]>, DefEFLAGS {
- let mayStore = 1;
-}
-// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and
-// read/write EFLAGS.
-class BinOpMI8F_MF<string m, X86TypeInfo t, Format f>
- : BinOpMI8<m, t, f>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS {
- let mayStore = 1;
-}
-
-// BinOpAI - Instructions that read "a-reg imm" (Accumulator register).
-class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args>
- : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>,
- Sched<[WriteALU]> {
- let ImmT = t.ImmEncoding;
- let Uses = [areg];
-}
-// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only.
-class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args>
- : BinOpAI<o, m, t, areg, args>, DefEFLAGS;
-
-// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS.
-class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
- string args> : BinOpAI<o, m, t, areg, args> {
- let Defs = [areg, EFLAGS];
-}
-// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write
-// EFLAGS.
-class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
- string args> : BinOpAI<o, m, t, areg, args> {
- let Uses = [areg, EFLAGS];
- let Defs = [areg, EFLAGS];
- let SchedRW = [WriteADC];
+//===----------------------------------------------------------------------===//
+// MUL/IMUL and DIV/IDIV Instructions
+//
+class MulDivOpR<bits<8> o, Format f, string m, X86TypeInfo t,
+ X86FoldableSchedWrite sched, list<dag> p>
+ : UnaryOpR<o, f, m, "$src1", t, (outs), p> {
+ let SchedRW = [sched];
}
-// UnaryOpR - Instructions that read "reg" and write "reg".
-class UnaryOpR<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p>
- : ITy<o, f, t, (outs t.RegClass:$dst),
- (ins t.RegClass:$src1), m, "$dst", p>, Sched<[WriteALU]>;
-
-// UnaryOpM - Instructions that read "[mem]" and writes "[mem]".
-class UnaryOpM<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p>
- : ITy<o, f, t, (outs), (ins t.MemOperand:$dst), m, "$dst", p>,
- Sched<[WriteALURMW]> {
- let mayLoad = 1;
- let mayStore = 1;
+class MulDivOpM<bits<8> o, Format f, string m, X86TypeInfo t,
+ X86FoldableSchedWrite sched, list<dag> p>
+ : UnaryOpM<o, f, m, "$src1", t, (outs), p> {
+ let SchedRW =
+ [sched.Folded,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ sched.ReadAfterFold, sched.ReadAfterFold];
}
-// INCDECR - Instructions like "inc reg".
-class INCDECR<Format f, string m, X86TypeInfo t, SDPatternOperator node>
- : UnaryOpR<0xFF, f, m, t,
- [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, 1))]>,
- DefEFLAGS {
- let isConvertibleToThreeAddress = 1; // Can xform into LEA.
+multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOperator node> {
+ // AL is really implied by AX, but the registers in Defs must match the
+ // SDNode results (i8, i32).
+ //
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+ def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8,
+ [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>;
+ let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+ def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16;
+ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+ def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32;
+ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+ def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>;
+ let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+ def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8,
+ [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>;
+ let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+ def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16;
+ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+ def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32;
+ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+ def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>;
}
-// INCDECM - Instructions like "inc [mem]".
-class INCDECM<Format f, string m, X86TypeInfo t, int num>
- : UnaryOpM<0xFF, f, m, t,
- [(store (add (t.LoadNode addr:$dst), num), addr:$dst),
- (implicit EFLAGS)]>, DefEFLAGS;
-
-// INCDECR_ALT - Instructions like "inc reg" short forms.
-class INCDECR_ALT<bits<8> o, string m, X86TypeInfo t>
- : UnaryOpR<o, AddRegFrm, m, t, []>, DefEFLAGS {
- // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
- let Predicates = [Not64BitMode];
+defm MUL : Mul<0xF7, "mul", MRM4r, MRM4m, mul>;
+defm IMUL : Mul<0xF7, "imul", MRM5r, MRM5m, null_frag>;
+
+multiclass Div<bits<8> o, string m, Format RegMRM, Format MemMRM> {
+ defvar sched8 = !if(!eq(m, "div"), WriteDiv8, WriteIDiv8);
+ defvar sched16 = !if(!eq(m, "div"), WriteDiv16, WriteIDiv16);
+ defvar sched32 = !if(!eq(m, "div"), WriteDiv32, WriteIDiv32);
+ defvar sched64 = !if(!eq(m, "div"), WriteDiv64, WriteIDiv64);
+ let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+ def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>;
+ let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+ def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16;
+ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+ def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32;
+ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+ def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>;
+ let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+ def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>;
+ let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+ def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16;
+ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+ def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32;
+ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+ def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>;
}
-
-// MulOpR - Instructions like "mul reg".
-class MulOpR<bits<8> o, Format f, string m, X86TypeInfo t,
- X86FoldableSchedWrite sched, list<dag> p>
- : ITy<o, f, t, (outs), (ins t.RegClass:$src), m, "$src", p>, Sched<[sched]>;
-
-// MulOpM - Instructions like "mul [mem]".
-class MulOpM<bits<8> o, Format f, string m, X86TypeInfo t,
- X86FoldableSchedWrite sched, list<dag> p>
- : ITy<o, f, t, (outs), (ins t.MemOperand:$src), m,
- "$src", p>, SchedLoadReg<sched> {
- let mayLoad = 1;
+let hasSideEffects = 1 in { // so that we don't speculatively execute
+defm DIV: Div<0xF7, "div", MRM6r, MRM6m>;
+defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>;
}
-// NegOpR - Instructions like "neg reg".
-class NegOpR<bits<8> o, string m, X86TypeInfo t>
- : UnaryOpR<o, MRM3r, m, t,
- [(set t.RegClass:$dst, (ineg t.RegClass:$src1)),
- (implicit EFLAGS)]>, DefEFLAGS;
-
-// NegOpM - Instructions like "neg [mem]".
-class NegOpM<bits<8> o, string m, X86TypeInfo t>
- : UnaryOpM<o, MRM3m, m, t,
- [(store (ineg (t.LoadNode addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, DefEFLAGS;
-
-// NOTE: NOT does not set EFLAGS!
-// NotOpR - Instructions like "not reg".
-class NotOpR<bits<8> o, string m, X86TypeInfo t>
- : UnaryOpR<o, MRM2r, m, t, [(set t.RegClass:$dst, (not t.RegClass:$src1))]>;
-
-// NotOpM - Instructions like "neg [mem]".
-class NotOpM<bits<8> o, string m, X86TypeInfo t>
- : UnaryOpM<o, MRM2m, m, t,
- [(store (not (t.LoadNode addr:$dst)), addr:$dst)]>;
-
-// IMulOpRR - Instructions like "imul reg, reg, i8".
-class IMulOpRR<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched>
- : BinOpRR_RF<o, m, t, X86smul_flag>, TB {
+class IMulOpRR<X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpRR_RF<0xAF, "imul", t, X86smul_flag>, TB {
let Form = MRMSrcReg;
let SchedRW = [sched];
// X = IMUL Y, Z --> X = IMUL Z, Y
let isCommutable = 1;
}
-
-// IMulOpRM - Instructions like "imul reg, reg, [mem]".
-class IMulOpRM<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched>
- : BinOpRM_RF<o, m, t, X86smul_flag>, TB {
+class IMulOpRM<X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpRM_RF<0xAF, "imul", t, X86smul_flag>, TB {
let Form = MRMSrcMem;
let SchedRW = [sched.Folded, sched.ReadAfterFold];
}
-// IMulOpRRI8 - Instructions like "imul reg, reg, i8".
-class IMulOpRRI8<bits<8> o, string m, X86TypeInfo t,
- X86FoldableSchedWrite sched>
- : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst),
- (ins t.RegClass:$src1, t.Imm8Operand:$src2), m,
- "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]>, DefEFLAGS {
- let ImmT = Imm8;
-}
+def IMUL16rr : IMulOpRR<Xi16, WriteIMul16Reg>, OpSize16;
+def IMUL32rr : IMulOpRR<Xi32, WriteIMul32Reg>, OpSize32;
+def IMUL64rr : IMulOpRR<Xi64, WriteIMul64Reg>;
+def IMUL16rm : IMulOpRM<Xi16, WriteIMul16Reg>, OpSize16;
+def IMUL32rm : IMulOpRM<Xi32, WriteIMul32Reg>, OpSize32;
+def IMUL64rm : IMulOpRM<Xi64, WriteIMul64Reg>;
-// IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64".
-class IMulOpRRI<bits<8> o, string m, X86TypeInfo t,
- X86FoldableSchedWrite sched>
- : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst),
- (ins t.RegClass:$src1, t.ImmOperand:$src2), m,
- "{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1,
- t.ImmNoSuOperator:$src2))]>,
- Sched<[sched]>, DefEFLAGS {
- let ImmT = t.ImmEncoding;
+class IMulOpRI8_R<X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpRI8<0x6B, "imul", binop_ndd_args, t, MRMSrcReg,
+ (outs t.RegClass:$dst)>, DefEFLAGS {
+ let SchedRW = [sched];
}
-
-// IMulOpRMI8 - Instructions like "imul reg, [mem], i8".
-class IMulOpRMI8<bits<8> o, string m, X86TypeInfo t,
- X86FoldableSchedWrite sched>
- : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
- (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m,
- "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]>,
+class IMulOpRI_R<X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg,
+ (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1,
+ t.ImmNoSuOperator:$src2))]>, DefEFLAGS {
+ let SchedRW = [sched];
+}
+class IMulOpMI8_R<X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)>,
DefEFLAGS {
- let ImmT = Imm8;
- let mayLoad = 1;
+ let Opcode = 0x6B;
+ let SchedRW = [sched.Folded];
}
-
-// IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64".
-class IMulOpRMI<bits<8> o, string m, X86TypeInfo t,
- X86FoldableSchedWrite sched>
- : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
- (ins t.MemOperand:$src1, t.ImmOperand:$src2), m,
- "{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set t.RegClass:$dst, EFLAGS,
- (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>,
- Sched<[sched.Folded]>, DefEFLAGS {
- let ImmT = t.ImmEncoding;
+class IMulOpMI_R<X86TypeInfo t, X86FoldableSchedWrite sched>
+ : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem,
+ (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (X86smul_flag (t.LoadNode addr:$src1),
+ t.ImmNoSuOperator:$src2))]>,
+ DefEFLAGS {
+ let SchedRW = [sched.Folded];
}
+def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, OpSize16;
+def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, OpSize32;
+def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>;
+def IMUL16rri : IMulOpRI_R<Xi16, WriteIMul16Imm>, OpSize16;
+def IMUL32rri : IMulOpRI_R<Xi32, WriteIMul32Imm>, OpSize32;
+def IMUL64rri32 : IMulOpRI_R<Xi64, WriteIMul64Imm>;
+
+def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, OpSize16;
+def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, OpSize32;
+def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>;
+def IMUL16rmi : IMulOpMI_R<Xi16, WriteIMul16Imm>, OpSize16;
+def IMUL32rmi : IMulOpMI_R<Xi32, WriteIMul32Imm>, OpSize32;
+def IMUL64rmi32 : IMulOpMI_R<Xi64, WriteIMul64Imm>;
-let Constraints = "$src1 = $dst" in {
-def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>, OpSize16;
-def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>, OpSize32;
-def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
-def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>, OpSize16;
-def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>, OpSize32;
-def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
-
-def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>, OpSize16;
-def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>, OpSize32;
-def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>;
-def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>, OpSize16;
-def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>, OpSize32;
-def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>;
+//===----------------------------------------------------------------------===//
+// INC and DEC Instructions
+//
+class IncOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag> {
+ let Pattern = [(set t.RegClass:$dst, EFLAGS,
+ (X86add_flag_nocf t.RegClass:$src1, 1))];
+}
+class DecOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag> {
+ let Pattern = [(set t.RegClass:$dst, EFLAGS,
+ (X86sub_flag_nocf t.RegClass:$src1, 1))];
+}
+class IncOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> {
+ let Pattern = [(store (add (t.LoadNode addr:$src1), 1), addr:$src1),
+ (implicit EFLAGS)];
+}
+class DecOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> {
+ let Pattern = [(store (add (t.LoadNode addr:$src1), -1), addr:$src1),
+ (implicit EFLAGS)];
+}
+// IncDec_Alt - Instructions like "inc reg" short forms.
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+class IncDec_Alt<bits<8> o, string m, X86TypeInfo t>
+ : UnaryOpR_RF<o, AddRegFrm, m, t, null_frag>, Requires<[Not64BitMode]>;
+
+let isConvertibleToThreeAddress = 1 in {
+def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16;
+def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32;
+def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16;
+def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32;
+def INC8r : IncOpR_RF<Xi8>;
+def INC16r : IncOpR_RF<Xi16>, OpSize16;
+def INC32r : IncOpR_RF<Xi32>, OpSize32;
+def INC64r : IncOpR_RF<Xi64>;
+def DEC8r : DecOpR_RF<Xi8>;
+def DEC16r : DecOpR_RF<Xi16>, OpSize16;
+def DEC32r : DecOpR_RF<Xi32>, OpSize32;
+def DEC64r : DecOpR_RF<Xi64>;
}
-
let Predicates = [UseIncDec] in {
-def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>;
-def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>, OpSize16;
-def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>, OpSize32;
-def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>;
-def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>, OpSize16;
-def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>, OpSize32;
+def INC8m : IncOpM_M<Xi8>;
+def INC16m : IncOpM_M<Xi16>, OpSize16;
+def INC32m : IncOpM_M<Xi32>, OpSize32;
+def DEC8m : DecOpM_M<Xi8>;
+def DEC16m : DecOpM_M<Xi16>, OpSize16;
+def DEC32m : DecOpM_M<Xi32>, OpSize32;
}
let Predicates = [UseIncDec, In64BitMode] in {
-def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>;
-def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>;
+def INC64m : IncOpM_M<Xi64>;
+def DEC64m : DecOpM_M<Xi64>;
}
-// Extra precision multiplication
-
-// AL is really implied by AX, but the registers in Defs must match the
-// SDNode results (i8, i32).
-// AL,AH = AL*GR8
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8,
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]>;
-// AX,DX = AX*GR16
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>, OpSize16;
-// EAX,EDX = EAX*GR32
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32r : MulOpR<0xF7, MRM4r, "mul", Xi32, WriteIMul32,
- [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>, OpSize32;
-// RAX,RDX = RAX*GR64
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def MUL64r : MulOpR<0xF7, MRM4r, "mul", Xi64, WriteIMul64,
- [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
-// AL,AH = AL*[mem8]
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m : MulOpM<0xF6, MRM4m, "mul", Xi8, WriteIMul8,
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>;
-// AX,DX = AX*[mem16]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>, OpSize16;
-// EAX,EDX = EAX*[mem32]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>, OpSize32;
-// RAX,RDX = RAX*[mem64]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def MUL64m : MulOpM<0xF7, MRM4m, "mul", Xi64, WriteIMul64, []>,
- Requires<[In64BitMode]>;
-
-// AL,AH = AL*GR8
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : MulOpR<0xF6, MRM5r, "imul", Xi8, WriteIMul8, []>;
-// AX,DX = AX*GR16
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>, OpSize16;
-// EAX,EDX = EAX*GR32
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>, OpSize32;
-// RAX,RDX = RAX*GR64
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def IMUL64r : MulOpR<0xF7, MRM5r, "imul", Xi64, WriteIMul64, []>;
-
-// AL,AH = AL*[mem8]
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m : MulOpM<0xF6, MRM5m, "imul", Xi8, WriteIMul8, []>;
-// AX,DX = AX*[mem16]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>, OpSize16;
-// EAX,EDX = EAX*[mem32]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>, OpSize32;
-// RAX,RDX = RAX*[mem64]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def IMUL64m : MulOpM<0xF7, MRM5m, "imul", Xi64, WriteIMul64, []>,
- Requires<[In64BitMode]>;
-
-let Constraints = "$src1 = $dst" in {
-// Register-Register Signed Integer Multiply
-def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16;
-def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32;
-def IMUL64rr : IMulOpRR<0xAF, "imul", Xi64, WriteIMul64Reg>;
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16;
-def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32;
-def IMUL64rm : IMulOpRM<0xAF, "imul", Xi64, WriteIMul64Reg>;
+//===----------------------------------------------------------------------===//
+// NEG and NOT Instructions
+//
+class NegOpR_R<X86TypeInfo t, bit ndd = 0>
+ : UnaryOpR_R<0xF7, MRM3r, "neg", t, ineg, ndd>;
+class NegOpR_RF<X86TypeInfo t, bit ndd = 0>
+ : UnaryOpR_RF<0xF7, MRM3r, "neg", t, ineg, ndd>;
+class NegOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM3m, "neg", t, null_frag>;
+class NegOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xF7, MRM3m, "neg", t, ineg>;
+class NegOpM_R<X86TypeInfo t> : UnaryOpM_R<0xF7, MRM3m, "neg", t, null_frag>;
+class NegOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xF7, MRM3m, "neg", t, ineg>;
+
+class NotOpR_R<X86TypeInfo t, bit ndd = 0>
+ : UnaryOpR_R<0xF7, MRM2r, "not", t, not, ndd>;
+class NotOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM2m, "not", t, not>;
+class NotOpM_R<X86TypeInfo t> : UnaryOpM_R<0xF7, MRM2m, "not", t, not>;
+
+let Predicates = [NoNDD] in {
+def NEG8r : NegOpR_RF<Xi8>;
+def NEG16r : NegOpR_RF<Xi16>, OpSize16;
+def NEG32r : NegOpR_RF<Xi32>, OpSize32;
+def NEG64r : NegOpR_RF<Xi64>;
+def NOT8r : NotOpR_R<Xi8>;
+def NOT16r : NotOpR_R<Xi16>, OpSize16;
+def NOT32r : NotOpR_R<Xi32>, OpSize32;
+def NOT64r : NotOpR_R<Xi64>;
}
-// Surprisingly enough, these are not two address instructions!
-// NOTE: These are order specific, we want the ri8 forms to be listed
-// first so that they are slightly preferred to the ri forms.
-
-// Register-Integer Signed Integer Multiply
-// GR16 = GR16*I8
-def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16;
-// GR16 = GR16*I16
-def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16;
-// GR32 = GR32*I8
-def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32;
-// GR32 = GR32*I32
-def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32;
-// GR64 = GR64*I8
-def IMUL64rri8 : IMulOpRRI8<0x6B, "imul", Xi64, WriteIMul64Imm>;
-// GR64 = GR64*I32
-def IMUL64rri32 : IMulOpRRI<0x69, "imul", Xi64, WriteIMul64Imm>;
-
-// Memory-Integer Signed Integer Multiply
-// GR16 = [mem16]*I8
-def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16;
-// GR16 = [mem16]*I16
-def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16;
-// GR32 = [mem32]*I8
-def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32;
-// GR32 = [mem32]*I32
-def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32;
-// GR64 = [mem64]*I8
-def IMUL64rmi8 : IMulOpRMI8<0x6B, "imul", Xi64, WriteIMul64Imm>;
-// GR64 = [mem64]*I32
-def IMUL64rmi32 : IMulOpRMI<0x69, "imul", Xi64, WriteIMul64Imm>;
-
-// unsigned division/remainder
-let hasSideEffects = 1 in { // so that we don't speculatively execute
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-// AX/r8 = AL,AH
-def DIV8r : MulOpR<0xF6, MRM6r, "div", Xi8, WriteDiv8, []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-// DX:AX/r16 = AX,DX
-def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>, OpSize16;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-// EDX:EAX/r32 = EAX,EDX
-def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>, OpSize32;
-// RDX:RAX/r64 = RAX,RDX
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-def DIV64r : MulOpR<0xF7, MRM6r, "div", Xi64, WriteDiv64, []>;
-
-let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-// AX/[mem8] = AL,AH
-def DIV8m : MulOpM<0xF6, MRM6m, "div", Xi8, WriteDiv8, []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-// DX:AX/[mem16] = AX,DX
-def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>, OpSize16;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>, OpSize32;
-// RDX:RAX/[mem64] = RAX,RDX
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-def DIV64m : MulOpM<0xF7, MRM6m, "div", Xi64, WriteDiv64, []>,
- Requires<[In64BitMode]>;
+let Predicates = [HasNDD, In64BitMode] in {
+def NEG8r_ND : NegOpR_RF<Xi8, 1>;
+def NEG16r_ND : NegOpR_RF<Xi16, 1>, PD;
+def NEG32r_ND : NegOpR_RF<Xi32, 1>;
+def NEG64r_ND : NegOpR_RF<Xi64, 1>;
+
+def NOT8r_ND : NotOpR_R<Xi8, 1>;
+def NOT16r_ND : NotOpR_R<Xi16, 1>, PD;
+def NOT32r_ND : NotOpR_R<Xi32, 1>;
+def NOT64r_ND : NotOpR_R<Xi64, 1>;
+
+def NEG8r_NF_ND : NegOpR_R<Xi8, 1>, EVEX_NF;
+def NEG16r_NF_ND : NegOpR_R<Xi16, 1>, EVEX_NF, PD;
+def NEG32r_NF_ND : NegOpR_R<Xi32, 1>, EVEX_NF;
+def NEG64r_NF_ND : NegOpR_R<Xi64, 1>, EVEX_NF;
}
-// Signed division/remainder.
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-// AX/r8 = AL,AH
-def IDIV8r : MulOpR<0xF6, MRM7r, "idiv", Xi8, WriteIDiv8, []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-// DX:AX/r16 = AX,DX
-def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>, OpSize16;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-// EDX:EAX/r32 = EAX,EDX
-def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>, OpSize32;
-// RDX:RAX/r64 = RAX,RDX
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-def IDIV64r: MulOpR<0xF7, MRM7r, "idiv", Xi64, WriteIDiv64, []>;
-
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-// AX/[mem8] = AL,AH
-def IDIV8m : MulOpM<0xF6, MRM7m, "idiv", Xi8, WriteIDiv8, []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-// DX:AX/[mem16] = AX,DX
-def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>, OpSize16;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-// EDX:EAX/[mem32] = EAX,EDX
-def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>, OpSize32;
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
-// RDX:RAX/[mem64] = RAX,RDX
-def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>,
- Requires<[In64BitMode]>;
-} // hasSideEffects = 1
-
-let Constraints = "$src1 = $dst" in {
-def NEG8r : NegOpR<0xF6, "neg", Xi8>;
-def NEG16r : NegOpR<0xF7, "neg", Xi16>, OpSize16;
-def NEG32r : NegOpR<0xF7, "neg", Xi32>, OpSize32;
-def NEG64r : NegOpR<0xF7, "neg", Xi64>;
+def NEG8m : NegOpM_MF<Xi8>;
+def NEG16m : NegOpM_MF<Xi16>, OpSize16;
+def NEG32m : NegOpM_MF<Xi32>, OpSize32;
+def NEG64m : NegOpM_MF<Xi64>, Requires<[In64BitMode]>;
+
+let Predicates = [HasNDD, In64BitMode] in {
+def NEG8m_ND : NegOpM_RF<Xi8>;
+def NEG16m_ND : NegOpM_RF<Xi16>, PD;
+def NEG32m_ND : NegOpM_RF<Xi32>;
+def NEG64m_ND : NegOpM_RF<Xi64>;
+
+def NEG8m_NF_ND : NegOpM_R<Xi8>, EVEX_NF;
+def NEG16m_NF_ND : NegOpM_R<Xi16>, EVEX_NF, PD;
+def NEG32m_NF_ND : NegOpM_R<Xi32>, EVEX_NF;
+def NEG64m_NF_ND : NegOpM_R<Xi64>, EVEX_NF;
}
-def NEG8m : NegOpM<0xF6, "neg", Xi8>;
-def NEG16m : NegOpM<0xF7, "neg", Xi16>, OpSize16;
-def NEG32m : NegOpM<0xF7, "neg", Xi32>, OpSize32;
-def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>;
+def NOT8m : NotOpM_M<Xi8>;
+def NOT16m : NotOpM_M<Xi16>, OpSize16;
+def NOT32m : NotOpM_M<Xi32>, OpSize32;
+def NOT64m : NotOpM_M<Xi64>, Requires<[In64BitMode]>;
-let Constraints = "$src1 = $dst" in {
-def NOT8r : NotOpR<0xF6, "not", Xi8>;
-def NOT16r : NotOpR<0xF7, "not", Xi16>, OpSize16;
-def NOT32r : NotOpR<0xF7, "not", Xi32>, OpSize32;
-def NOT64r : NotOpR<0xF7, "not", Xi64>;
+let Predicates = [HasNDD, In64BitMode] in {
+def NOT8m_ND : NotOpM_R<Xi8>;
+def NOT16m_ND : NotOpM_R<Xi16>, PD;
+def NOT32m_ND : NotOpM_R<Xi32>;
+def NOT64m_ND : NotOpM_R<Xi64>;
}
-def NOT8m : NotOpM<0xF6, "not", Xi8>;
-def NOT16m : NotOpM<0xF7, "not", Xi16>, OpSize16;
-def NOT32m : NotOpM<0xF7, "not", Xi32>, OpSize32;
-def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>;
+let Predicates = [In64BitMode], Pattern = [(null_frag)] in {
+def NEG8r_NF : NegOpR_R<Xi8>, NF;
+def NEG16r_NF : NegOpR_R<Xi16>, NF, PD;
+def NEG32r_NF : NegOpR_R<Xi32>, NF;
+def NEG64r_NF : NegOpR_R<Xi64>, NF;
+def NEG8m_NF : NegOpM_M<Xi8>, NF;
+def NEG16m_NF : NegOpM_M<Xi16>, NF, PD;
+def NEG32m_NF : NegOpM_M<Xi32>, NF;
+def NEG64m_NF : NegOpM_M<Xi64>, NF;
+
+def NEG8r_EVEX : NegOpR_RF<Xi8>, PL;
+def NEG16r_EVEX : NegOpR_RF<Xi16>, PL, PD;
+def NEG32r_EVEX : NegOpR_RF<Xi32>, PL;
+def NEG64r_EVEX : NegOpR_RF<Xi64>, PL;
+
+def NOT8r_EVEX : NotOpR_R<Xi8>, PL;
+def NOT16r_EVEX : NotOpR_R<Xi16>, PL, PD;
+def NOT32r_EVEX : NotOpR_R<Xi32>, PL;
+def NOT64r_EVEX : NotOpR_R<Xi64>, PL;
+
+def NEG8m_EVEX : NegOpM_MF<Xi8>, PL;
+def NEG16m_EVEX : NegOpM_MF<Xi16>, PL, PD;
+def NEG32m_EVEX : NegOpM_MF<Xi32>, PL;
+def NEG64m_EVEX : NegOpM_MF<Xi64>, PL;
+
+def NOT8m_EVEX : NotOpM_M<Xi8>, PL;
+def NOT16m_EVEX : NotOpM_M<Xi16>, PL, PD;
+def NOT32m_EVEX : NotOpM_M<Xi32>, PL;
+def NOT64m_EVEX : NotOpM_M<Xi64>, PL;
+}
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
/// defined with "(set GPR:$dst, EFLAGS, (...".
@@ -640,61 +347,204 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
SDNode opnodeflag, SDNode opnode,
bit CommutableRR, bit ConvertibleToThreeAddress,
bit ConvertibleToThreeAddressRR> {
- let Constraints = "$src1 = $dst" in {
- let isCommutable = CommutableRR,
- isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
- def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16;
- def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32;
- def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
+ let Predicates = [NoNDD] in {
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16;
+ def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32;
+ def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>;
+ def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD;
+ def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>;
+ def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>;
+ def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF;
+ def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD;
+ def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF;
+ def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF;
+ def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD;
+ def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF;
+ def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF;
+ def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
}
+ }
- def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
- def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
- def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
+ def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
+ def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+ let Predicates = [In64BitMode] in {
+ def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL;
+ def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD;
+ def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL;
+ def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL;
+ def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>;
+ def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD;
+ def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>;
+ def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>;
+ def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF;
+ def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD;
+ def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF;
+ def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF;
+ def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF;
+ def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD;
+ def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF;
+ def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF;
+ }
- def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
- def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16;
- def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32;
- def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+ let Predicates = [NoNDD] in {
+ def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16;
+ def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32;
+ def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>;
+ def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD;
+ def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>;
+ def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>;
+ def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF;
+ def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD;
+ def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF;
+ def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF;
+ def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD;
+ def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF;
+ def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF;
+ def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL;
+ def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD;
+ def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL;
+ def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL;
+ }
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
- // NOTE: These are order specific, we want the ri8 forms to be listed
- // first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
- def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
- def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>;
-
- def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16;
- def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32;
- def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>;
+ let Predicates = [NoNDD] in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>;
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16;
+ def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32;
+ def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>;
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD;
+ def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>;
+ def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>;
+ def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>;
+ def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD;
+ def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>;
+ def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>;
+ def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD;
+ def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF;
+ def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF;
+ def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF;
+ def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD;
+ def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF;
+ def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD;
+ def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF;
+ def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF;
+ def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF;
+ def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD;
+ def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF;
+ def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF;
+ def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD;
+ def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL;
+ def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL;
+ def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL;
+ def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD;
+ def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL;
+ def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL;
+ }
}
- } // Constraints = "$src1 = $dst"
- def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>;
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD;
+ def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF;
+ def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD;
+ def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF;
+ def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF;
+ def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD;
+ def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF;
+ def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF;
+ def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ }
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16;
def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>;
-
+ def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>;
def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD;
+ def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>;
+ def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>;
+ def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD;
+ def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD;
+ def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF;
+ def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF;
+ def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF;
+ def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD;
+ def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF;
+ def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD;
+ def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF;
+ def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF;
+ def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF;
+ def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD;
+ def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF;
+ def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF;
+ def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD;
+ def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL;
+ def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL;
+ def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL;
+ def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD;
+ def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL;
+ def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL;
+ }
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode] in {
- let Constraints = "$src1 = $dst" in
def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
}
@@ -719,62 +569,153 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
string mnemonic, Format RegMRM, Format MemMRM,
SDNode opnode, bit CommutableRR,
bit ConvertibleToThreeAddress> {
- let Constraints = "$src1 = $dst" in {
- let isCommutable = CommutableRR in {
- def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ let isCommutable = CommutableRR in {
+ let Predicates = [NoNDD] in {
+ def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
- } // isConvertibleToThreeAddress
+ def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ }
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD;
+ def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>;
+ def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>;
+ }
+ }
} // isCommutable
+ let Predicates = [In64BitMode] in {
+ def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ }
+
def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>;
def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16;
def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32;
def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>;
+ let Predicates = [In64BitMode] in {
+ def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>;
+ def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD;
+ def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>;
+ def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>;
+ def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL;
+ def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD;
+ def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL;
+ def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL;
+ }
+
+ let Predicates = [NoNDD] in {
+ def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
+ def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
+ def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>;
+ }
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>;
+ def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD;
+ def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>;
+ def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL;
+ def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD;
+ def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL;
+ def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL;
+ }
- def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>;
- def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16;
- def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32;
- def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>;
+ let Predicates = [NoNDD] in {
+ def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
+ def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
+ def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>;
+
+ def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
+ def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
+ def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>;
+ }
+ }
- def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- // NOTE: These are order specific, we want the ri8 forms to be listed
- // first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16;
- def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32;
- def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>;
-
- def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16;
- def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32;
- def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD;
+ def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>;
+ def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>;
+ def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD;
+ def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>;
+ def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>;
}
- } // Constraints = "$src1 = $dst"
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL;
+ def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD;
+ def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL;
+ def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL;
+ def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD;
+ def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL;
+ def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL;
+ }
def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>;
def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16;
def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32;
def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>;
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD;
+ def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL;
+ def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD;
+ def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL;
+ def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL;
+ }
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
+ def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16;
def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>;
-
- def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>;
def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16;
def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32;
let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+
+ let Predicates = [HasNDD, In64BitMode] in {
+ def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD;
+ def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>;
+ def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>;
+ def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD;
+ def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>;
+ }
+ let Predicates = [In64BitMode] in {
+ def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL;
+ def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD;
+ def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL;
+ def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL;
+ def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD;
+ def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL;
+ def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL;
+ }
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode] in {
- let Constraints = "$src1 = $dst" in
def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly;
def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly;
}
@@ -1089,36 +1030,30 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
//===----------------------------------------------------------------------===//
// ANDN Instruction
//
-multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
- PatFrag ld_frag, X86FoldableSchedWrite sched> {
-let Predicates = [HasBMI, NoEGPR] in {
- def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
- VEX, VVVV, Sched<[sched]>;
- def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- VEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
-}
-let Predicates = [HasBMI, HasEGPR, In64BitMode] in {
- def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>,
- EVEX, VVVV, Sched<[sched]>;
- def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
- EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
-}
+multiclass AndN<X86TypeInfo t, string suffix> {
+ defvar andn_rr_p =
+ [(set t.RegClass:$dst, EFLAGS, (X86and_flag (not t.RegClass:$src1),
+ t.RegClass:$src2))];
+ defvar andn_rm_p =
+ [(set t.RegClass:$dst, EFLAGS, (X86and_flag (not t.RegClass:$src1),
+ (t.LoadNode addr:$src2)))];
+ def rr#suffix : ITy<0xF2, MRMSrcReg, t, (outs t.RegClass:$dst),
+ (ins t.RegClass:$src1, t.RegClass:$src2), "andn",
+ binop_ndd_args, andn_rr_p>, VVVV, Sched<[WriteALU]>,
+ T8, DefEFLAGS;
+ def rm#suffix : ITy<0xF2, MRMSrcMem, t, (outs t.RegClass:$dst),
+ (ins t.RegClass:$src1, t.MemOperand:$src2), "andn",
+ binop_ndd_args, andn_rm_p>, VVVV,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>,
+ T8, DefEFLAGS;
}
// Complexity is reduced to give and with immediate a chance to match first.
-let Defs = [EFLAGS], AddedComplexity = -6 in {
- defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8;
- defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8, REX_W;
+let AddedComplexity = -6 in {
+defm ANDN32 : AndN<Xi32, "">, VEX, Requires<[HasBMI, NoEGPR]>;
+defm ANDN64 : AndN<Xi64, "">, VEX, REX_W, Requires<[HasBMI, NoEGPR]>;
+defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]>;
+defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
}
let Predicates = [HasBMI], AddedComplexity = -6 in {
@@ -1135,78 +1070,63 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
//===----------------------------------------------------------------------===//
// MULX Instruction
//
-multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
- X86FoldableSchedWrite sched> {
-let hasSideEffects = 0 in {
-let Predicates = [HasBMI2, NoEGPR] in {
- def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
- !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8, XD, VEX, VVVV, Sched<[WriteIMulH, sched]>;
-
+multiclass MulX<X86TypeInfo t, X86FoldableSchedWrite sched> {
+ defvar mulx_args = "{$src, $dst2, $dst1|$dst1, $dst2, $src}";
+ defvar mulx_rm_sched =
+ [WriteIMulHLd, sched.Folded,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Implicit read of EDX/RDX
+ sched.ReadAfterFold];
+
+ def rr : ITy<0xF6, MRMSrcReg, t, (outs t.RegClass:$dst1, t.RegClass:$dst2),
+ (ins t.RegClass:$src), "mulx", mulx_args, []>, T8, XD, VEX,
+ VVVV, Sched<[WriteIMulH, sched]>;
let mayLoad = 1 in
- def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
- !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8, XD, VEX, VVVV,
- Sched<[WriteIMulHLd, sched.Folded,
- // Memory operand.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // Implicit read of EDX/RDX
- sched.ReadAfterFold]>;
-
+ def rm : ITy<0xF6, MRMSrcMem, t, (outs t.RegClass:$dst1, t.RegClass:$dst2),
+ (ins t.MemOperand:$src), "mulx", mulx_args, []>, T8, XD, VEX,
+ VVVV, Sched<mulx_rm_sched>;
+
+ let Predicates = [In64BitMode] in {
+ def rr_EVEX : ITy<0xF6, MRMSrcReg, t,
+ (outs t.RegClass:$dst1, t.RegClass:$dst2),
+ (ins t.RegClass:$src), "mulx", mulx_args, []>, T8, XD,
+ EVEX, VVVV, Sched<[WriteIMulH, sched]>;
+ let mayLoad = 1 in
+ def rm_EVEX : ITy<0xF6, MRMSrcMem, t,
+ (outs t.RegClass:$dst1, t.RegClass:$dst2),
+ (ins t.MemOperand:$src), "mulx", mulx_args, []>, T8, XD,
+ EVEX, VVVV, Sched<mulx_rm_sched>;
+ }
// Pseudo instructions to be used when the low result isn't used. The
// instruction is defined to keep the high if both destinations are the same.
- def Hrr : PseudoI<(outs RC:$dst), (ins RC:$src),
- []>, Sched<[sched]>;
-
+ def Hrr : PseudoI<(outs t.RegClass:$dst), (ins t.RegClass:$src), []>,
+ Sched<[sched]>;
let mayLoad = 1 in
- def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
- []>, Sched<[sched.Folded]>;
-}
-let Predicates = [HasBMI2, HasEGPR, In64BitMode] in
- def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
- !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8, XD, EVEX, VVVV, Sched<[WriteIMulH, sched]>;
-let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in
- def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
- !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8, XD, EVEX, VVVV,
- Sched<[WriteIMulHLd, sched.Folded,
- // Memory operand.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // Implicit read of EDX/RDX
- sched.ReadAfterFold]>;
-}
+ def Hrm : PseudoI<(outs t.RegClass:$dst), (ins t.MemOperand:$src), []>,
+ Sched<[sched.Folded]>;
}
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
+defm MULX32 : MulX<Xi32, WriteMULX32>;
+
let Uses = [RDX] in
- defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
+defm MULX64 : MulX<Xi64, WriteMULX64>, REX_W;
//===----------------------------------------------------------------------===//
// ADCX and ADOX Instructions
//
// We don't have patterns for these as there is no advantage over ADC for
// most code.
-class ADCOXOpRR <string m, X86TypeInfo t>
- : BinOpRRF_RF<0xF6, m, t, null_frag> {
- let Form = MRMSrcReg;
- let isCommutable = 1;
+let Form = MRMSrcReg in {
+def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD;
+def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD;
+def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS;
+def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS;
}
-
-class ADCOXOpRM <string m, X86TypeInfo t>
- : BinOpRMF_RF<0xF6, m, t, null_frag> {
- let Form = MRMSrcMem;
-}
-
-let OpSize = OpSizeFixed, Constraints = "$src1 = $dst",
- Predicates = [HasADX] in {
-def ADCX32rr : ADCOXOpRR<"adcx", Xi32>, T8, PD;
-def ADCX64rr : ADCOXOpRR<"adcx", Xi64>, T8, PD;
-def ADOX32rr : ADCOXOpRR<"adox", Xi32>, T8, XS;
-def ADOX64rr : ADCOXOpRR<"adox", Xi64>, T8, XS;
-def ADCX32rm : ADCOXOpRM<"adcx", Xi32>, T8, PD;
-def ADCX64rm : ADCOXOpRM<"adcx", Xi64>, T8, PD;
-def ADOX32rm : ADCOXOpRM<"adox", Xi32>, T8, XS;
-def ADOX64rm : ADCOXOpRM<"adox", Xi64>, T8, XS;
+let Form = MRMSrcMem in {
+def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD;
+def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD;
+def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS;
+def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
index 07e5576960d6..6e76b44b66a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
@@ -256,6 +256,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field?
bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field?
bit hasEVEX_B = 0; // Does this inst set the EVEX_B field?
+ bit hasEVEX_NF = 0; // Does this inst set the EVEX_NF field?
bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width.
// Declare it int rather than bits<4> so that all bits are defined when
// assigning to bits<7>.
@@ -309,4 +310,5 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{48} = hasEVEX_RC;
let TSFlags{49} = hasNoTrackPrefix;
let TSFlags{51-50} = explicitOpPrefixBits;
+ let TSFlags{52} = hasEVEX_NF;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td
index 8653f15d8602..94fa6e45ded9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -10,6 +10,8 @@ def TruePredicate : Predicate<"true">;
def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
+def HasNDD : Predicate<"Subtarget->hasNDD()">;
+def NoNDD : Predicate<"!Subtarget->hasNDD()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
def HasNOPL : Predicate<"Subtarget->hasNOPL()">;
@@ -100,7 +102,6 @@ def HasIFMA : Predicate<"Subtarget->hasIFMA()">;
def HasAVXIFMA : Predicate<"Subtarget->hasAVXIFMA()">;
def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
-def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasSHA512 : Predicate<"Subtarget->hasSHA512()">;
def HasSGX : Predicate<"Subtarget->hasSGX()">;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index df1f0b5b4ca7..e8a1a2b83886 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -6655,49 +6655,51 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
// SSE4.2 - CRC Instructions
//===----------------------------------------------------------------------===//
+// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
+// controlled by the SSE42 flag.
+//
// No CRC instructions have AVX equivalents
-// crc intrinsic instruction
-// This set of instructions are only rm, the only difference is the size
-// of r and m.
-class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
- RegisterClass RCIn, SDPatternOperator Int> :
- CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
- !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
- [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
- Sched<[WriteCRC32]>;
-
-class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
- X86MemOperand x86memop, SDPatternOperator Int> :
- CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
- !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
- [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
- Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
-
-let Constraints = "$src1 = $dst" in {
- def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
- int_x86_sse42_crc32_32_8>;
- def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
- int_x86_sse42_crc32_32_8>;
- def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
- int_x86_sse42_crc32_32_16>, OpSize16;
- def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
- int_x86_sse42_crc32_32_16>, OpSize16;
- def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
- int_x86_sse42_crc32_32_32>, OpSize32;
- def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
- int_x86_sse42_crc32_32_32>, OpSize32;
- def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
- int_x86_sse42_crc32_64_64>, REX_W;
- def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
- int_x86_sse42_crc32_64_64>, REX_W;
- let hasSideEffects = 0 in {
- let mayLoad = 1 in
- def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
- null_frag>, REX_W;
- def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
- null_frag>, REX_W;
- }
+class Crc32r<X86TypeInfo t, RegisterClass rc, SDPatternOperator node>
+ : ITy<0xF1, MRMSrcReg, t, (outs rc:$dst), (ins rc:$src1, t.RegClass:$src2),
+ "crc32", binop_args, [(set rc:$dst, (node rc:$src1, t.RegClass:$src2))]>,
+ Sched<[WriteCRC32]>, NoCD8 {
+ let Constraints = "$src1 = $dst";
+}
+
+class Crc32m<X86TypeInfo t, RegisterClass rc, SDPatternOperator node>
+ : ITy<0xF1, MRMSrcMem, t, (outs rc:$dst), (ins rc:$src1, t.MemOperand:$src2),
+ "crc32", binop_args, [(set rc:$dst, (node rc:$src1, (load addr:$src2)))]>,
+ Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>, NoCD8 {
+ let Constraints = "$src1 = $dst";
+}
+
+let Predicates = [HasCRC32, NoEGPR], OpMap = T8, OpPrefix = XD in {
+ def CRC32r32r8 : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>;
+ def CRC32r32m8 : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>;
+ def CRC32r32r16 : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16;
+ def CRC32r32m16 : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16;
+ def CRC32r32r32 : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32;
+ def CRC32r32m32 : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32;
+ def CRC32r64r64 : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>;
+ def CRC32r64m64 : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>;
+ def CRC32r64r8 : Crc32r<Xi8, GR64, null_frag>, REX_W;
+ let mayLoad = 1 in
+ def CRC32r64m8 : Crc32m<Xi8, GR64, null_frag>, REX_W;
+}
+
+let Predicates = [HasCRC32, HasEGPR, In64BitMode], OpMap = T_MAP4, OpEnc = EncEVEX in {
+ def CRC32r32r8_EVEX : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>;
+ def CRC32r32m8_EVEX : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>;
+ def CRC32r32r16_EVEX : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD;
+ def CRC32r32m16_EVEX : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD;
+ def CRC32r32r32_EVEX : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>;
+ def CRC32r32m32_EVEX : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>;
+ def CRC32r64r64_EVEX : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>;
+ def CRC32r64m64_EVEX : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>;
+ def CRC32r64r8_EVEX : Crc32r<Xi8, GR64, null_frag>, REX_W;
+ let mayLoad = 1 in
+ def CRC32r64m8_EVEX : Crc32m<Xi8, GR64, null_frag>, REX_W;
}
//===----------------------------------------------------------------------===//
@@ -7160,6 +7162,10 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128rm addr:$src)>;
}
+let Predicates = [HasAVXNECONVERT, NoVLX] in
+ def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF128rm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
@@ -7905,6 +7911,9 @@ let Predicates = [HasAVX2, NoVLX] in {
defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
+let Predicates = [HasAVXNECONVERT, NoVLX] in
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8bf16, v16bf16, loadv8bf16, loadv16bf16>;
+
//===----------------------------------------------------------------------===//
// VEXTRACTI128 - Extract packed integer values
//
@@ -7927,6 +7936,9 @@ let Predicates = [HasAVX2, NoVLX] in {
defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>;
}
+let Predicates = [HasAVXNECONVERT, NoVLX] in
+ defm : vextract_lowering<"VEXTRACTI128", v16bf16, v8bf16>;
+
//===----------------------------------------------------------------------===//
// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
//
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
index efb58c6102dd..699e5847e63f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
@@ -446,11 +446,11 @@ let Predicates = [HasUSERMSR], mayLoad = 1 in {
}
let Predicates = [HasUSERMSR], mayStore = 1 in {
def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
- "uwrmsr\t{$src1, $src2|$src2, $src1}",
+ "uwrmsr\t{$src2, $src1|$src1, $src2}",
[(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8, XS;
def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm),
"uwrmsr\t{$src, $imm|$imm, $src}",
- [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7, XS, VEX;
+ [(int_x86_uwrmsr i64immSExt32_su:$imm, GR64:$src)]>, T_MAP7, XS, VEX;
}
let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
index 9499753143d9..da85922a018d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrUtils.td
@@ -39,17 +39,19 @@ class PS { Prefix OpPrefix = PS; }
class PD { Prefix OpPrefix = PD; }
class XD { Prefix OpPrefix = XD; }
class XS { Prefix OpPrefix = XS; }
-class VEX { Encoding OpEnc = EncVEX; }
+class XOP { Encoding OpEnc = EncXOP; }
+class VEX { Encoding OpEnc = EncVEX; }
+class EVEX { Encoding OpEnc = EncEVEX; }
class WIG { bit IgnoresW = 1; }
// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX.
class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class VVVV { bit hasVEX_4V = 1; }
-class EVEX { Encoding OpEnc = EncEVEX; }
class EVEX_K { bit hasEVEX_K = 1; }
class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
class EVEX_B { bit hasEVEX_B = 1; }
+class EVEX_NF { bit hasEVEX_NF = 1; }
class EVEX_RC { bit hasEVEX_RC = 1; }
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
@@ -63,7 +65,7 @@ class EVEX_CD8<int esize, CD8VForm form> {
bits<3> CD8_Form = form.Value;
}
class NoCD8 { bits<7> CD8_Scale = 0; }
-class XOP { Encoding OpEnc = EncXOP; }
+
class EVEX2VEXOverride<string VEXInstrName> {
string EVEX2VEXOverride = VEXInstrName;
}
@@ -99,16 +101,24 @@ class DisassembleOnly {
bit ForceDisassemble = 1;
}
-
-// SchedModel info for instruction that loads one value and gets the second
-// (and possibly third) value from a register.
-// This is used for instructions that put the memory operands before other
-// uses.
-class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
- // Memory operand.
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- // Register reads (implicit or explicit).
- Sched.ReadAfterFold, Sched.ReadAfterFold]>;
+defvar unaryop_args = "$src1";
+defvar unaryop_ndd_args = "{$src1, $dst|$dst, $src1}";
+defvar binop_args = "{$src2, $src1|$src1, $src2}";
+defvar binop_ndd_args = "{$src2, $src1, $dst|$dst, $src1, $src2}";
+defvar tie_dst_src1 = "$src1 = $dst";
+
+// NDD - Helper for new data destination instructions
+class NDD<bit ndd> {
+ string Constraints = !if(!eq(ndd, 0), tie_dst_src1, "");
+ Encoding OpEnc = !if(!eq(ndd, 0), EncNormal, EncEVEX);
+ bit hasEVEX_B = ndd;
+ bit hasVEX_4V = ndd;
+ Map OpMap = !if(!eq(ndd, 0), OB, T_MAP4);
+}
+// NF - Helper for NF (no flags update) instructions
+class NF: T_MAP4, EVEX, EVEX_NF, NoCD8;
+// PL - Helper for promoted legacy instructions
+class PL: T_MAP4, EVEX, NoCD8, ExplicitEVEXPrefix;
//===----------------------------------------------------------------------===//
// X86 Type infomation definitions
@@ -723,13 +733,6 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD,
Requires<[UseSSE42]>;
-// CRC32I - SSE 4.2 CRC32 instructions.
-// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
-// controlled by the SSE42 flag.
-class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, XD, Requires<[HasCRC32]>;
-
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//
@@ -957,15 +960,380 @@ class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
/// 2. Infers whether the instruction should have a 0x40 REX_W prefix.
/// 3. Infers whether the low bit of the opcode should be 0 (for i8 operations)
/// or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
- string mnemonic, string args, list<dag> pattern>
- : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
- opcode{3}, opcode{2}, opcode{1},
- !if(!eq(typeinfo.HasEvenOpcode, 1), 0, opcode{0})}, f, outs, ins,
- !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
-
+class ITy<bits<8> o, Format f, X86TypeInfo t, dag outs, dag ins, string m,
+ string args, list<dag> p>
+ : I<{o{7}, o{6}, o{5}, o{4}, o{3}, o{2}, o{1},
+ !if(!eq(t.HasEvenOpcode, 1), 0, o{0})}, f, outs, ins,
+ !strconcat(m, "{", t.InstrSuffix, "}\t", args), p> {
let hasSideEffects = 0;
- let hasREX_W = typeinfo.HasREX_W;
+ let hasREX_W = t.HasREX_W;
}
-defvar binop_args = "{$src2, $src1|$src1, $src2}";
+// BinOpRR - Instructions that read "reg, reg".
+class BinOpRR<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p>
+ : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m,
+ args, p>, Sched<[WriteALU]>;
+// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only.
+class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpRR<o, m, binop_args, t, (outs),
+ [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>,
+ DefEFLAGS;
+// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F
+class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t>
+ : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly {
+ let Form = MRMSrcReg;
+}
+// BinOpRR_R - Instructions that read "reg, reg" and write "reg".
+class BinOpRR_R<bits<8> o, string m, X86TypeInfo t, bit ndd = 0>
+ : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t,
+ (outs t.RegClass:$dst), []>, NDD<ndd>;
+// BinOpRR_R_Rev - Reversed encoding of BinOpRR_R
+class BinOpRR_R_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0>
+ : BinOpRR_R<o, m, t, ndd>, DisassembleOnly {
+ let Form = MRMSrcReg;
+}
+// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS.
+class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+ : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t,
+ (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS, NDD<ndd>;
+// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF.
+class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0>
+ : BinOpRR_RF<o, m, t, null_frag, ndd>, DisassembleOnly {
+ let Form = MRMSrcReg;
+}
+// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write
+// EFLAGS.
+class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+ : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.RegClass:$src2,
+ EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<ndd> {
+ let SchedRW = [WriteADC];
+}
+// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF
+class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0>
+ : BinOpRRF_RF<o, m, t, null_frag, ndd>, DisassembleOnly {
+ let Form = MRMSrcReg;
+}
+
+// BinOpRM - Instructions that read "reg, [mem]".
+class BinOpRM<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p>
+ : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m,
+ args, p>,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> {
+ let mayLoad = 1;
+}
+// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only.
+class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node>
+ : BinOpRM<o, m, binop_args, t, (outs),
+ [(set EFLAGS, (node t.RegClass:$src1,
+ (t.LoadNode addr:$src2)))]>, DefEFLAGS;
+// BinOpRM_R - Instructions that read "reg, [mem]", and write "reg".
+class BinOpRM_R<bits<8> o, string m, X86TypeInfo t, bit ndd = 0>
+ : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst),
+ []>, NDD<ndd>;
+// BinOpRM_RF - Instructions that read "reg, [mem]", and write "reg", EFLAGS.
+class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+ : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1,
+ (t.LoadNode addr:$src2)))]>, DefEFLAGS, NDD<ndd>;
+// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write
+// EFLAGS.
+class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0>
+ : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>,
+ DefEFLAGS, UseEFLAGS, NDD<ndd> {
+ let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
+ // base, scale, index, offset, segment.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // implicit register read.
+ WriteADC.ReadAfterFold];
+}
+
+// BinOpRI - Instructions that read "reg, imm".
+class BinOpRI<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p>
+ : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m,
+ args, p>, Sched<[WriteALU]> {
+ let ImmT = t.ImmEncoding;
+}
+// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only.
+class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
+ Format f>
+ : BinOpRI<o, m, binop_args, t, f, (outs),
+ [(set EFLAGS, (node t.RegClass:$src1,
+ t.ImmOperator:$src2))]>, DefEFLAGS;
+// BinOpRI_R - Instructions that read "reg, imm" and write "reg".
+class BinOpRI_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0>
+ : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst),
+ []>, NDD<ndd>;
+// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS.
+class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, Format f, bit ndd = 0>
+ : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS, NDD<ndd>;
+// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write
+// EFLAGS.
+class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f, bit ndd = 0>
+ : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS,
+ (node t.RegClass:$src1, t.ImmOperator:$src2,
+ EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<ndd> {
+ let SchedRW = [WriteADC];
+}
+// BinOpRI8 - Instructions that read "reg, imm8".
+class BinOpRI8<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out>
+ : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m,
+ args, []>, Sched<[WriteALU]> {
+ let ImmT = Imm8;
+}
+// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only.
+class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f>
+ : BinOpRI8<o, m, binop_args, t, f, (outs)>, DefEFLAGS;
+// BinOpRI8_R - Instructions that read "reg, imm8" and write "reg".
+class BinOpRI8_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0>
+ : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, NDD<ndd>;
+// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS.
+class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0>
+ : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, DefEFLAGS, NDD<ndd>;
+// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write
+// EFLAGS.
+class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0>
+ : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS, NDD<ndd> {
+ let SchedRW = [WriteADC];
+}
+
+// BinOpMR - Instructions that read "[mem], reg".
+class BinOpMR<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p>
+ : ITy<o, MRMDestMem, t, out, (ins t.MemOperand:$src1, t.RegClass:$src2), m,
+ args, p> {
+ let mayLoad = 1;
+ let SchedRW = [WriteALU.Folded, WriteALU.ReadAfterFold];
+}
+// BinOpMR_R - Instructions that read "[mem], reg", and write "reg".
+class BinOpMR_R<bits<8> o, string m, X86TypeInfo t>
+ : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), []>, NDD<1>;
+// BinOpMR_RF - Instructions that read "[mem], reg", and write "reg", EFLAGS.
+class BinOpMR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1),
+ t.RegClass:$src2))]>, DefEFLAGS, NDD<1>;
+// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only.
+class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpMR<o, m, binop_args, t, (outs),
+ [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>,
+ Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS;
+// BinOpMR_M - Instructions that read "[mem], reg" and write "[mem]".
+class BinOpMR_M<bits<8> o, string m, X86TypeInfo t>
+ : BinOpMR<o, m, binop_args, t, (outs), []>,
+ Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault]> {
+ let mayStore = 1;
+}
+// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS.
+class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpMR<o, m, binop_args, t, (outs),
+ [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1),
+ (implicit EFLAGS)]>,
+ Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>, // reg
+ DefEFLAGS {
+ let mayStore = 1;
+}
+// BinOpMRF_RF - Instructions that read "[mem], reg", write "reg" and
+// read/write EFLAGS.
+class BinOpMRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node (load addr:$src1),
+ t.RegClass:$src2, EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<1>,
+ Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>;
+// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and
+// read/write EFLAGS.
+class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
+ : BinOpMR<o, m, binop_args, t, (outs),
+ [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS),
+ addr:$src1), (implicit EFLAGS)]>,
+ Sched<[WriteADCRMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold, // reg
+ WriteALU.ReadAfterFold]>, // EFLAGS
+ DefEFLAGS, UseEFLAGS {
+ let mayStore = 1;
+}
+
+// BinOpMI - Instructions that read "[mem], imm".
+class BinOpMI<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p>
+ : ITy<o, f, t, out, (ins t.MemOperand:$src1, t.ImmOperand:$src2), m,
+ args, p> {
+ let ImmT = t.ImmEncoding;
+ let mayLoad = 1;
+}
+// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only.
+class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
+ Format f>
+ : BinOpMI<o, m, binop_args, t, f, (outs),
+ [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>,
+ Sched<[WriteALU.Folded]>, DefEFLAGS;
+// BinOpMI_R - Instructions that read "[mem], imm" and write "reg".
+class BinOpMI_R<bits<8> o, string m, X86TypeInfo t, Format f>
+ : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), []>,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>;
+// BinOpMI_R - Instructions that read "[mem], imm" and write "reg", EFLAGS.
+class BinOpMI_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
+ Format f>
+ : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>;
+// BinOpMI_M - Instructions that read "[mem], imm" and write "[mem]".
+class BinOpMI_M<bits<8> o, string m, X86TypeInfo t, Format f>
+ : BinOpMI<o, m, binop_args, t, f, (outs), []>, Sched<[WriteALURMW]> {
+ let mayStore = 1;
+}
+// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS.
+class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, Format f>
+ : BinOpMI<o, m, binop_args, t, f, (outs),
+ [(store (node (t.VT (load addr:$src1)),
+ t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>,
+ Sched<[WriteALURMW]>, DefEFLAGS {
+ let mayStore = 1;
+}
+// BinOpMIF_RF - Instructions that read "[mem], imm", write "reg" and
+// read/write EFLAGS.
+class BinOpMIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
+ : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node (t.VT (load addr:$src1)),
+ t.ImmOperator:$src2, EFLAGS))]>,
+ Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>, DefEFLAGS, UseEFLAGS, NDD<1>;
+// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and
+// read/write EFLAGS.
+class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f>
+ : BinOpMI<o, m, binop_args, t, f, (outs),
+ [(store (node (t.VT (load addr:$src1)),
+ t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>,
+ Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS {
+ let mayStore = 1;
+}
+
+// BinOpMI8 - Instructions that read "[mem], imm8".
+class BinOpMI8<string m, string args, X86TypeInfo t, Format f, dag out>
+ : ITy<0x83, f, t, out, (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m,
+ args, []> {
+ let ImmT = Imm8;
+ let mayLoad = 1;
+}
+// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only.
+class BinOpMI8_F<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALU.Folded]>, DefEFLAGS;
+// BinOpMI8_R - Instructions that read "[mem], imm8" and write "reg".
+class BinOpMI8_R<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>;
+// BinOpMI8_RF - Instructions that read "[mem], imm8" and write "reg"/EFLAGS.
+class BinOpMI8_RF<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>;
+// BinOpMI8_M - Instructions that read "[mem], imm8" and write "[mem]".
+class BinOpMI8_M<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]> {
+ let mayStore = 1;
+}
+// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS.
+class BinOpMI8_MF<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]>, DefEFLAGS {
+ let mayStore = 1;
+}
+// BinOpMI8F_RF - Instructions that read "[mem], imm8", write "reg" and
+// read/write EFLAGS.
+class BinOpMI8F_RF<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>,
+ Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>, DefEFLAGS, UseEFLAGS, NDD<1>;
+// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and
+// read/write EFLAGS.
+class BinOpMI8F_MF<string m, X86TypeInfo t, Format f>
+ : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS {
+ let mayStore = 1;
+}
+
+// BinOpAI - Instructions that read "a-reg imm" (Accumulator register).
+class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args>
+ : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>,
+ Sched<[WriteALU]> {
+ let ImmT = t.ImmEncoding;
+ let Uses = [areg];
+}
+// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only.
+class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args>
+ : BinOpAI<o, m, t, areg, args>, DefEFLAGS;
+
+// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS.
+class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
+ string args> : BinOpAI<o, m, t, areg, args> {
+ let Defs = [areg, EFLAGS];
+}
+// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write
+// EFLAGS.
+class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
+ string args> : BinOpAI<o, m, t, areg, args> {
+ let Uses = [areg, EFLAGS];
+ let Defs = [areg, EFLAGS];
+ let SchedRW = [WriteADC];
+}
+
+// UnaryOpR - Instructions that read "reg".
+class UnaryOpR<bits<8> o, Format f, string m, string args, X86TypeInfo t,
+ dag out, list<dag> p>
+ : ITy<o, f, t, out, (ins t.RegClass:$src1), m, args, p>, Sched<[WriteALU]>;
+// UnaryOpR_R - Instructions that read "reg" and write "reg".
+class UnaryOpR_R<bits<8> o, Format f, string m, X86TypeInfo t,
+ SDPatternOperator node, bit ndd = 0>
+ : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t,
+ (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, (node t.RegClass:$src1))]>, NDD<ndd>;
+// UnaryOpR_RF - Instructions that read "reg" and write "reg"/EFLAGS.
+class UnaryOpR_RF<bits<8> o, Format f, string m, X86TypeInfo t,
+ SDPatternOperator node, bit ndd = 0>
+ : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t,
+ (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, (node t.RegClass:$src1)),
+ (implicit EFLAGS)]>, DefEFLAGS, NDD<ndd>;
+
+// UnaryOpM - Instructions that read "[mem]".
+class UnaryOpM<bits<8> o, Format f, string m, string args, X86TypeInfo t,
+ dag out, list<dag> p>
+ : ITy<o, f, t, out, (ins t.MemOperand:$src1), m, args, p> {
+ let mayLoad = 1;
+}
+// UnaryOpM_R - Instructions that read "[mem]" and writes "reg".
+class UnaryOpM_R<bits<8> o, Format f, string m, X86TypeInfo t,
+ SDPatternOperator node>
+ : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1)))]>,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>;
+// UnaryOpM_RF - Instructions that read "[mem]" and writes "reg"/EFLAGS.
+class UnaryOpM_RF<bits<8> o, Format f, string m, X86TypeInfo t,
+ SDPatternOperator node>
+ : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst),
+ [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1)))]>,
+ Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>;
+// UnaryOpM_M - Instructions that read "[mem]" and writes "[mem]".
+class UnaryOpM_M<bits<8> o, Format f, string m, X86TypeInfo t,
+ SDPatternOperator node>
+ : UnaryOpM<o, f, m, unaryop_args, t, (outs),
+ [(store (node (t.LoadNode addr:$src1)), addr:$src1)]>,
+ Sched<[WriteALURMW]>{
+ let mayStore = 1;
+}
+// UnaryOpM_MF - Instructions that read "[mem]" and writes "[mem]"/EFLAGS.
+class UnaryOpM_MF<bits<8> o, Format f, string m, X86TypeInfo t,
+ SDPatternOperator node>
+ : UnaryOpM<o, f, m, unaryop_args, t, (outs),
+ [(store (node (t.LoadNode addr:$src1)), addr:$src1),
+ (implicit EFLAGS)]>, Sched<[WriteALURMW]>, DefEFLAGS {
+ let mayStore = 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 70bd77bba03a..bbd19cf8d5b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -130,6 +130,9 @@ let Predicates = [HasAVX, NoVLX] in {
defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>;
}
+let Predicates = [HasAVXNECONVERT, NoVLX] in
+ defm : subvec_zero_lowering<"DQA", VR128, v16bf16, v8bf16, sub_xmm>;
+
let Predicates = [HasVLX] in {
defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>;
defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>;
@@ -175,6 +178,12 @@ let Predicates = [HasFP16, HasVLX] in {
defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>;
}
+let Predicates = [HasBF16, HasVLX] in {
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16bf16, v8bf16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v32bf16, v8bf16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v32bf16, v16bf16, sub_ymm>;
+}
+
class maskzeroupper<ValueType vt, RegisterClass RC> :
PatLeaf<(vt RC:$src), [{
return isMaskZeroExtended(N);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 8a04987e768a..49631f38017a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1459,6 +1459,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
Args, CxtI);
}
+InstructionCost
+X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+ unsigned Opcode1, const SmallBitVector &OpcodeMask,
+ TTI::TargetCostKind CostKind) const {
+ if (isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask))
+ return TTI::TCC_Basic;
+ return InstructionCost::getInvalid();
+}
+
InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *BaseTp,
ArrayRef<int> Mask,
@@ -3724,10 +3733,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v8i16, { 8, 13, 10, 16 } },
{ ISD::BITREVERSE, MVT::v32i8, { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert
{ ISD::BITREVERSE, MVT::v16i8, { 7, 7, 9, 13 } },
- { ISD::BSWAP, MVT::v4i64, { 5, 7, 5, 10 } },
- { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 3 } },
- { ISD::BSWAP, MVT::v8i32, { 5, 7, 5, 10 } },
- { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v4i64, { 5, 6, 5, 10 } },
+ { ISD::BSWAP, MVT::v2i64, { 2, 2, 1, 3 } },
+ { ISD::BSWAP, MVT::v8i32, { 5, 6, 5, 10 } },
+ { ISD::BSWAP, MVT::v4i32, { 2, 2, 1, 3 } },
{ ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
{ ISD::BSWAP, MVT::v8i16, { 2, 2, 1, 3 } },
{ ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert
@@ -3804,6 +3813,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } }, // sqrtpd
};
static const CostKindTblEntry SLMCostTbl[] = {
+ { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } },
{ ISD::FSQRT, MVT::f32, { 20, 20, 1, 1 } }, // sqrtss
{ ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } }, // sqrtps
{ ISD::FSQRT, MVT::f64, { 35, 35, 1, 1 } }, // sqrtsd
@@ -3842,9 +3854,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::v4i32, { 16, 20, 11, 21 } },
{ ISD::BITREVERSE, MVT::v8i16, { 16, 20, 11, 21 } },
{ ISD::BITREVERSE, MVT::v16i8, { 11, 12, 10, 16 } },
- { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } },
- { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } },
- { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 5 } },
+ { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 5 } },
+ { ISD::BSWAP, MVT::v8i16, { 2, 3, 1, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
{ ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
{ ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0fa0d240a548..07a3fff4f84b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -140,6 +140,11 @@ public:
TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
+ InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+ unsigned Opcode1,
+ const SmallBitVector &OpcodeMask,
+ TTI::TargetCostKind CostKind) const;
+
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1288597fc6b0..05003ec304ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -250,7 +250,7 @@ bool XCoreDAGToDAGISel::tryBRIND(SDNode *N) {
SDValue Addr = N->getOperand(1);
if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
return false;
- unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+ unsigned IntNo = Addr->getConstantOperandVal(1);
if (IntNo != Intrinsic::xcore_checkevent)
return false;
SDValue nextAddr = Addr->getOperand(2);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 7736adab19e8..18feeaadb03c 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -767,7 +767,7 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
// An index of zero corresponds to the current function's frame address.
// An index of one to the parent's frame address, and so on.
// Depths > 0 not supported yet!
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ if (Op.getConstantOperandVal(0) > 0)
return SDValue();
MachineFunction &MF = DAG.getMachineFunction();
@@ -783,7 +783,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
// An index of zero corresponds to the current function's return address.
// An index of one to the parent's return address, and so on.
// Depths > 0 not supported yet!
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ if (Op.getConstantOperandVal(0) > 0)
return SDValue();
MachineFunction &MF = DAG.getMachineFunction();
@@ -905,7 +905,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
SDValue XCoreTargetLowering::
LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned IntNo = Op.getConstantOperandVal(0);
switch (IntNo) {
case Intrinsic::xcore_crc8:
EVT VT = Op.getValueType();
@@ -1497,7 +1497,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::INTRINSIC_VOID:
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ switch (N->getConstantOperandVal(1)) {
case Intrinsic::xcore_outt:
case Intrinsic::xcore_outct:
case Intrinsic::xcore_chkct: {
@@ -1733,30 +1733,30 @@ void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case ISD::INTRINSIC_W_CHAIN:
{
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- switch (IntNo) {
- case Intrinsic::xcore_getts:
- // High bits are known to be zero.
- Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
- Known.getBitWidth() - 16);
- break;
- case Intrinsic::xcore_int:
- case Intrinsic::xcore_inct:
- // High bits are known to be zero.
- Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
- Known.getBitWidth() - 8);
- break;
- case Intrinsic::xcore_testct:
- // Result is either 0 or 1.
- Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
- Known.getBitWidth() - 1);
- break;
- case Intrinsic::xcore_testwct:
- // Result is in the range 0 - 4.
- Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
- Known.getBitWidth() - 3);
- break;
- }
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ case Intrinsic::xcore_getts:
+ // High bits are known to be zero.
+ Known.Zero =
+ APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 16);
+ break;
+ case Intrinsic::xcore_int:
+ case Intrinsic::xcore_inct:
+ // High bits are known to be zero.
+ Known.Zero =
+ APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 8);
+ break;
+ case Intrinsic::xcore_testct:
+ // Result is either 0 or 1.
+ Known.Zero =
+ APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 1);
+ break;
+ case Intrinsic::xcore_testwct:
+ // Result is in the range 0 - 4.
+ Known.Zero =
+ APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 3);
+ break;
+ }
}
break;
}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp
index 27d168020ce6..ce640f5b8d45 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -348,11 +348,7 @@ StringRef ARM::getArchExtName(uint64_t ArchExtKind) {
}
static bool stripNegationPrefix(StringRef &Name) {
- if (Name.starts_with("no")) {
- Name = Name.substr(2);
- return true;
- }
- return false;
+ return Name.consume_front("no");
}
StringRef ARM::getArchExtFeature(StringRef ArchExt) {
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
index 11c5000acc07..2e08c7b12d9d 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
@@ -1160,7 +1160,7 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 25:
CPU = "znver3";
*Type = X86::AMDFAM19H;
- if ((Model >= 0x00 && Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
+ if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
(Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
(Model >= 0x50 && Model <= 0x5f)) {
// Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
index d475650c2d18..e93502187b54 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp
@@ -1208,8 +1208,7 @@ static VersionTuple parseVersionFromName(StringRef Name) {
VersionTuple Triple::getEnvironmentVersion() const {
StringRef EnvironmentName = getEnvironmentName();
StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment());
- if (EnvironmentName.starts_with(EnvironmentTypeName))
- EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size());
+ EnvironmentName.consume_front(EnvironmentTypeName);
return parseVersionFromName(EnvironmentName);
}
diff --git a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
index 085554f18b2b..d46ff07ec734 100644
--- a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -162,8 +162,6 @@ constexpr FeatureBitset FeaturesAlderlake =
constexpr FeatureBitset FeaturesSierraforest =
FeaturesAlderlake | FeatureCMPCCXADD | FeatureAVXIFMA | FeatureUINTR |
FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8;
-constexpr FeatureBitset FeaturesGrandridge =
- FeaturesSierraforest | FeatureRAOINT;
constexpr FeatureBitset FeaturesArrowlakeS = FeaturesSierraforest |
FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4;
constexpr FeatureBitset FeaturesPantherlake =
@@ -369,7 +367,7 @@ constexpr ProcInfo Processors[] = {
// Sierraforest microarchitecture based processors.
{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.
- { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesGrandridge, 'p', false },
+ { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Granite Rapids microarchitecture based processors.
{ {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false },
// Granite Rapids D microarchitecture based processors.
diff --git a/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp b/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp
index 2f82bc03480b..aea772dbc4be 100644
--- a/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp
+++ b/contrib/llvm-project/llvm/lib/TextAPI/TextStubV5.cpp
@@ -201,8 +201,9 @@ Expected<StubT> getRequiredValue(
template <typename JsonT, typename StubT = JsonT>
Expected<StubT> getRequiredValue(
TBDKey Key, const Object *Obj,
- std::function<std::optional<JsonT>(const Object *, StringRef)> GetValue,
- StubT DefaultValue, std::function<std::optional<StubT>(JsonT)> Validate) {
+ std::function<std::optional<JsonT>(const Object *, StringRef)> const
+ GetValue,
+ StubT DefaultValue, function_ref<std::optional<StubT>(JsonT)> Validate) {
std::optional<JsonT> Val = GetValue(Obj, Keys[Key]);
if (!Val)
return DefaultValue;
@@ -215,7 +216,7 @@ Expected<StubT> getRequiredValue(
}
Error collectFromArray(TBDKey Key, const Object *Obj,
- std::function<void(StringRef)> Append,
+ function_ref<void(StringRef)> Append,
bool IsRequired = false) {
const auto *Values = Obj->getArray(Keys[Key]);
if (!Values) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index f37b4dc938d3..529f7309a1a2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -2951,9 +2951,11 @@ void coro::salvageDebugInfo(
// dbg.declare does.
if (isa<DbgDeclareInst>(DVI)) {
std::optional<BasicBlock::iterator> InsertPt;
- if (auto *I = dyn_cast<Instruction>(Storage))
+ if (auto *I = dyn_cast<Instruction>(Storage)) {
InsertPt = I->getInsertionPointAfterDef();
- else if (isa<Argument>(Storage))
+ if (!OptimizeFrame && I->getDebugLoc())
+ DVI.setDebugLoc(I->getDebugLoc());
+ } else if (isa<Argument>(Storage))
InsertPt = F->getEntryBlock().begin();
if (InsertPt)
DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 7c277518b21d..7ebf265e17ba 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -76,6 +76,7 @@ STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
@@ -1279,6 +1280,45 @@ static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
}
}
+/// Deduce noundef attributes for the SCC.
+static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
+ // Check each function in turn, determining which functions return noundef
+ // values.
+ for (Function *F : SCCNodes) {
+ // Already noundef.
+ if (F->getAttributes().hasRetAttr(Attribute::NoUndef))
+ continue;
+
+ // We can infer and propagate function attributes only when we know that the
+ // definition we'll get at link time is *exactly* the definition we see now.
+ // For more details, see GlobalValue::mayBeDerefined.
+ if (!F->hasExactDefinition())
+ return;
+
+ // MemorySanitizer assumes that the definition and declaration of a
+ // function will be consistent. A function with sanitize_memory attribute
+ // should be skipped from inference.
+ if (F->hasFnAttribute(Attribute::SanitizeMemory))
+ continue;
+
+ if (F->getReturnType()->isVoidTy())
+ continue;
+
+ if (all_of(*F, [](BasicBlock &BB) {
+ if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
+ // TODO: perform context-sensitive analysis?
+ return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue());
+ }
+ return true;
+ })) {
+ F->addRetAttr(Attribute::NoUndef);
+ ++NumNoUndefReturn;
+ Changed.insert(F);
+ }
+ }
+}
+
namespace {
/// Collects a set of attribute inference requests and performs them all in one
@@ -1629,7 +1669,10 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
for (auto &I : BB.instructionsWithoutDebug())
if (auto *CB = dyn_cast<CallBase>(&I)) {
Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee == F || !Callee->doesNotRecurse())
+ if (!Callee || Callee == F ||
+ (!Callee->doesNotRecurse() &&
+ !(Callee->isDeclaration() &&
+ Callee->hasFnAttribute(Attribute::NoCallback))))
// Function calls a potentially recursive function.
return;
}
@@ -1785,6 +1828,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addWillReturn(Nodes.SCCNodes, Changed);
+ addNoUndefAttrs(Nodes.SCCNodes, Changed);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 719a2678fc18..556fde37efeb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
assert(NotLHS != nullptr && NotRHS != nullptr &&
"isFreeToInvert desynced with getFreelyInverted");
Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);
- return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2),
- LHSPlusRHS);
+ return BinaryOperator::CreateSub(
+ ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 63b1e0f64a88..c03f50d75814 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3513,9 +3513,13 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Op0, C);
// ((B | C) & A) | B -> B | (A & C)
- if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
+ if (match(Op0, m_c_And(m_c_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
+ // B | ((B | C) & A) -> B | (A & C)
+ if (match(Op1, m_c_And(m_c_Or(m_Specific(Op0), m_Value(C)), m_Value(A))))
+ return BinaryOperator::CreateOr(Op0, Builder.CreateAnd(A, C));
+
if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))
return DeMorgan;
@@ -3872,6 +3876,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
}
}
+ // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2
+ if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) &&
+ match(Op1, m_APInt(C2))) {
+ KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I);
+ if ((KnownX.One & *C2) == *C2)
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
+ }
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3b7fe7fa2266..43d4496571be 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3850,6 +3850,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (Callee->hasFnAttribute("thunk"))
return false;
+ // If this is a call to a naked function, the assembly might be
+ // using an argument, or otherwise rely on the frame layout,
+ // the function prototype will mismatch.
+ if (Callee->hasFnAttribute(Attribute::Naked))
+ return false;
+
// If this is a musttail call, the callee's prototype must match the caller's
// prototype with the exception of pointee types. The code below doesn't
// implement that, so we can't do this transform.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52..3875e59c3ede 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -111,8 +111,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
- GV->getValueType() != GEP->getSourceElementType() ||
- !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() ||
+ !GV->hasDefinitiveInitializer())
return nullptr;
Constant *Init = GV->getInitializer();
@@ -128,8 +128,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// the simple index into a single-dimensional array.
//
// Require: GEP GV, 0, i {{, constant indices}}
- if (GEP->getNumOperands() < 3 ||
- !isa<ConstantInt>(GEP->getOperand(1)) ||
+ if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
!cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
isa<Constant>(GEP->getOperand(2)))
return nullptr;
@@ -142,15 +141,18 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!Idx) return nullptr; // Variable index.
+ if (!Idx)
+ return nullptr; // Variable index.
uint64_t IdxVal = Idx->getZExtValue();
- if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
+ if ((unsigned)IdxVal != IdxVal)
+ return nullptr; // Too large array index.
if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
- if (IdxVal >= ATy->getNumElements()) return nullptr;
+ if (IdxVal >= ATy->getNumElements())
+ return nullptr;
EltTy = ATy->getElementType();
} else {
return nullptr; // Unknown type.
@@ -191,7 +193,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
Constant *Elt = Init->getAggregateElement(i);
- if (!Elt) return nullptr;
+ if (!Elt)
+ return nullptr;
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty()) {
@@ -214,16 +217,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
// undef in the middle of the range.
- if (TrueRangeEnd == (int)i-1)
+ if (TrueRangeEnd == (int)i - 1)
TrueRangeEnd = i;
- if (FalseRangeEnd == (int)i-1)
+ if (FalseRangeEnd == (int)i - 1)
FalseRangeEnd = i;
continue;
}
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
- if (!isa<ConstantInt>(C)) return nullptr;
+ if (!isa<ConstantInt>(C))
+ return nullptr;
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
@@ -233,7 +237,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (IsTrueForElt) {
// Update the TrueElement state machine.
if (FirstTrueElement == Undefined)
- FirstTrueElement = TrueRangeEnd = i; // First true element.
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
else {
// Update double-compare state machine.
if (SecondTrueElement == Undefined)
@@ -242,7 +246,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SecondTrueElement = Overdefined;
// Update range state machine.
- if (TrueRangeEnd == (int)i-1)
+ if (TrueRangeEnd == (int)i - 1)
TrueRangeEnd = i;
else
TrueRangeEnd = Overdefined;
@@ -259,7 +263,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SecondFalseElement = Overdefined;
// Update range state machine.
- if (FalseRangeEnd == (int)i-1)
+ if (FalseRangeEnd == (int)i - 1)
FalseRangeEnd = i;
else
FalseRangeEnd = Overdefined;
@@ -348,7 +352,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
- Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *SecondFalseIdx =
+ ConstantInt::get(Idx->getType(), SecondFalseElement);
Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
@@ -365,8 +370,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Idx = Builder.CreateAdd(Idx, Offs);
}
- Value *End = ConstantInt::get(Idx->getType(),
- TrueRangeEnd-FirstTrueElement+1);
+ Value *End =
+ ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1);
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
}
@@ -380,8 +385,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Idx = Builder.CreateAdd(Idx, Offs);
}
- Value *End = ConstantInt::get(Idx->getType(),
- FalseRangeEnd-FirstFalseElement);
+ Value *End =
+ ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement);
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
@@ -4624,27 +4629,35 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
- if (BO0 && isa<OverflowingBinaryOperator>(BO0))
- NoOp0WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
- if (BO1 && isa<OverflowingBinaryOperator>(BO1))
- NoOp1WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
+ bool Op0HasNUW = false, Op1HasNUW = false;
+ bool Op0HasNSW = false, Op1HasNSW = false;
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
+ bool &HasNSW, bool &HasNUW) -> bool {
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ HasNUW = BO.hasNoUnsignedWrap();
+ HasNSW = BO.hasNoSignedWrap();
+ return ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && HasNUW) ||
+ (CmpInst::isSigned(Pred) && HasNSW);
+ } else if (BO.getOpcode() == Instruction::Or) {
+ HasNUW = true;
+ HasNSW = true;
+ return true;
+ } else {
+ return false;
+ }
+ };
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- if (BO0 && BO0->getOpcode() == Instruction::Add) {
- A = BO0->getOperand(0);
- B = BO0->getOperand(1);
+
+ if (BO0) {
+ match(BO0, m_AddLike(m_Value(A), m_Value(B)));
+ NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
}
- if (BO1 && BO1->getOpcode() == Instruction::Add) {
- C = BO1->getOperand(0);
- D = BO1->getOperand(1);
+ if (BO1) {
+ match(BO1, m_AddLike(m_Value(C), m_Value(D)));
+ NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
}
// icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
@@ -4764,17 +4777,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
APInt AP2Abs = AP2->abs();
if (AP1Abs.uge(AP2Abs)) {
APInt Diff = *AP1 - *AP2;
- bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
- bool HasNSW = BO0->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
return new ICmpInst(Pred, NewAdd, C);
} else {
APInt Diff = *AP2 - *AP1;
- bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
- bool HasNSW = BO1->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW);
return new ICmpInst(Pred, A, NewAdd);
}
}
@@ -4868,16 +4879,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// if Z != 0 and nsw(X * Z) and nsw(Y * Z)
// X * Z eq/ne Y * Z -> X eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
- BO1->hasNoSignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
return new ICmpInst(Pred, X, Y);
} else
NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// If Z != 0 and nuw(X * Z) and nuw(Y * Z)
// X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
- BO1->hasNoUnsignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
return new ICmpInst(Pred, X, Y);
}
}
@@ -4966,7 +4975,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::SDiv:
- if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
+ if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) ||
+ !BO0->isExact() || !BO1->isExact())
break;
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
@@ -4976,8 +4986,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::Shl: {
- bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
- bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ bool NUW = Op0HasNUW && Op1HasNUW;
+ bool NSW = Op0HasNSW && Op1HasNSW;
if (!NUW && !NSW)
break;
if (!NSW && I.isSigned())
@@ -5029,10 +5039,10 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
/// Fold icmp Pred min|max(X, Y), Z.
-Instruction *
-InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,
- MinMaxIntrinsic *MinMax, Value *Z,
- ICmpInst::Predicate Pred) {
+Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I,
+ MinMaxIntrinsic *MinMax,
+ Value *Z,
+ ICmpInst::Predicate Pred) {
Value *X = MinMax->getLHS();
Value *Y = MinMax->getRHS();
if (ICmpInst::isSigned(Pred) && !MinMax->isSigned())
@@ -5161,24 +5171,6 @@ InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I,
return nullptr;
}
-Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) {
- ICmpInst::Predicate Pred = Cmp.getPredicate();
- Value *Lhs = Cmp.getOperand(0);
- Value *Rhs = Cmp.getOperand(1);
-
- if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) {
- if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred))
- return Res;
- }
-
- if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) {
- if (Instruction *Res = foldICmpWithMinMaxImpl(
- Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred)))
- return Res;
- }
-
- return nullptr;
-}
// Canonicalize checking for a power-of-2-or-zero value:
static Instruction *foldICmpPow2Test(ICmpInst &I,
@@ -6843,6 +6835,34 @@ static Instruction *foldReductionIdiom(ICmpInst &I,
return nullptr;
}
+// This helper will be called with icmp operands in both orders.
+Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred,
+ Value *Op0, Value *Op1,
+ ICmpInst &CxtI) {
+ // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
+ if (auto *GEP = dyn_cast<GEPOperator>(Op0))
+ if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI))
+ return NI;
+
+ if (auto *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
+ return NI;
+
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0))
+ if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred))
+ return Res;
+
+ {
+ Value *X;
+ const APInt *C;
+ // icmp X+Cst, X
+ if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
+ return foldICmpAddOpConst(X, *C, Pred);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -6966,20 +6986,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
return Res;
- // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
- if (auto *GEP = dyn_cast<GEPOperator>(Op0))
- if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
- return NI;
- if (auto *GEP = dyn_cast<GEPOperator>(Op1))
- if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I))
- return NI;
-
- if (auto *SI = dyn_cast<SelectInst>(Op0))
- if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I))
- return NI;
- if (auto *SI = dyn_cast<SelectInst>(Op1))
- if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I))
- return NI;
+ if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I))
+ return Res;
+ if (Instruction *Res =
+ foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I))
+ return Res;
// In case of a comparison with two select instructions having the same
// condition, check whether one of the resulting branches can be simplified.
@@ -7030,9 +7041,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *R = foldICmpWithCastOp(I))
return R;
- if (Instruction *Res = foldICmpWithMinMax(I))
- return Res;
-
{
Value *X, *Y;
// Transform (X & ~Y) == 0 --> (X & Y) != 0
@@ -7134,18 +7142,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
!ACXI->isWeak())
return ExtractValueInst::Create(ACXI, 1);
- {
- Value *X;
- const APInt *C;
- // icmp X+Cst, X
- if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
- return foldICmpAddOpConst(X, *C, I.getPredicate());
-
- // icmp X, X+Cst
- if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
- return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
- }
-
if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
return Res;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9e76a0cf17b1..bdaf7550b4b4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -648,9 +648,8 @@ public:
Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
const APInt &C);
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
- Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax,
- Value *Z, ICmpInst::Predicate Pred);
- Instruction *foldICmpWithMinMax(ICmpInst &Cmp);
+ Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax,
+ Value *Z, ICmpInst::Predicate Pred);
Instruction *foldICmpEquality(ICmpInst &Cmp);
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
Instruction *foldSignBitTest(ICmpInst &I);
@@ -708,6 +707,8 @@ public:
const APInt &C);
Instruction *foldICmpBitCast(ICmpInst &Cmp);
Instruction *foldICmpWithTrunc(ICmpInst &Cmp);
+ Instruction *foldICmpCommutative(ICmpInst::Predicate Pred, Value *Op0,
+ Value *Op1, ICmpInst &CxtI);
// Helpers of visitSelectInst().
Instruction *foldSelectOfBools(SelectInst &SI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 20bf00344b14..ab55f235920a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1171,14 +1171,15 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return nullptr;
}
-static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
- InstCombinerImpl &IC) {
+static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal,
+ InstCombinerImpl &IC) {
Value *LHS, *RHS;
// TODO: What to do with pointer min/max patterns?
- if (!Sel.getType()->isIntOrIntVectorTy())
+ if (!TrueVal->getType()->isIntOrIntVectorTy())
return nullptr;
- SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor;
+ SelectPatternFlavor SPF =
+ matchDecomposedSelectPattern(&Cmp, TrueVal, FalseVal, LHS, RHS).Flavor;
if (SPF == SelectPatternFlavor::SPF_ABS ||
SPF == SelectPatternFlavor::SPF_NABS) {
if (!Cmp.hasOneUse() && !RHS->hasOneUse())
@@ -1188,13 +1189,13 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
match(RHS, m_NSWNeg(m_Specific(LHS)));
Constant *IntMinIsPoisonC =
- ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
+ ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison);
Instruction *Abs =
IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
if (SPF == SelectPatternFlavor::SPF_NABS)
- return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
- return IC.replaceInstUsesWith(Sel, Abs);
+ return IC.Builder.CreateNeg(Abs); // Always without NSW flag!
+ return Abs;
}
if (SelectPatternResult::isMinOrMax(SPF)) {
@@ -1215,8 +1216,7 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
default:
llvm_unreachable("Unexpected SPF");
}
- return IC.replaceInstUsesWith(
- Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS));
+ return IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS);
}
return nullptr;
@@ -1677,8 +1677,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI))
return NewSel;
- if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this))
- return NewSPF;
+ if (Value *V =
+ canonicalizeSPF(*ICI, SI.getTrueValue(), SI.getFalseValue(), *this))
+ return replaceInstUsesWith(SI, V);
if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder))
return replaceInstUsesWith(SI, V);
@@ -2363,6 +2364,9 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel,
Value *FVal = Sel.getFalseValue();
Type *SelType = Sel.getType();
+ if (ICmpInst::makeCmpResultType(TVal->getType()) != Cond->getType())
+ return nullptr;
+
// Match select ?, TC, FC where the constants are equal but negated.
// TODO: Generalize to handle a negated variable operand?
const APFloat *TC, *FC;
@@ -3790,5 +3794,50 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *I = foldBitCeil(SI, Builder))
return I;
+ // Fold:
+ // (select A && B, T, F) -> (select A, (select B, T, F), F)
+ // (select A || B, T, F) -> (select A, T, (select B, T, F))
+ // if (select B, T, F) is foldable.
+ // TODO: preserve FMF flags
+ auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,
+ Value *B) -> Instruction * {
+ if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,
+ SQ.getWithInstruction(&SI)))
+ return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);
+
+ // Is (select B, T, F) a SPF?
+ if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(B))
+ if (Value *V = canonicalizeSPF(*Cmp, TrueVal, FalseVal, *this))
+ return SelectInst::Create(A, IsAnd ? V : TrueVal,
+ IsAnd ? FalseVal : V);
+ }
+
+ return nullptr;
+ };
+
+ Value *LHS, *RHS;
+ if (match(CondVal, m_And(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS))
+ return I;
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, RHS, LHS))
+ return I;
+ } else if (match(CondVal, m_Or(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS))
+ return I;
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, RHS, LHS))
+ return I;
+ } else {
+ // We cannot swap the operands of logical and/or.
+ // TODO: Can we swap the operands by inserting a freeze?
+ if (match(CondVal, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS))
+ return I;
+ } else if (match(CondVal, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) {
+ if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS))
+ return I;
+ }
+ }
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 7f5a7b666903..351fc3b0174f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2469,31 +2469,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
DL.getIndexSizeInBits(AS)) {
uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
- bool Matched = false;
- uint64_t C;
- Value *V = nullptr;
if (TyAllocSize == 1) {
- V = GEP.getOperand(1);
- Matched = true;
- } else if (match(GEP.getOperand(1),
- m_AShr(m_Value(V), m_ConstantInt(C)))) {
- if (TyAllocSize == 1ULL << C)
- Matched = true;
- } else if (match(GEP.getOperand(1),
- m_SDiv(m_Value(V), m_ConstantInt(C)))) {
- if (TyAllocSize == C)
- Matched = true;
+ // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
+ // but only if the result pointer is only used as if it were an integer,
+ // or both point to the same underlying object (otherwise provenance is
+ // not necessarily retained).
+ Value *X = GEP.getPointerOperand();
+ Value *Y;
+ if (match(GEP.getOperand(1),
+ m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+ GEPType == Y->getType()) {
+ bool HasSameUnderlyingObject =
+ getUnderlyingObject(X) == getUnderlyingObject(Y);
+ bool Changed = false;
+ GEP.replaceUsesWithIf(Y, [&](Use &U) {
+ bool ShouldReplace = HasSameUnderlyingObject ||
+ isa<ICmpInst>(U.getUser()) ||
+ isa<PtrToIntInst>(U.getUser());
+ Changed |= ShouldReplace;
+ return ShouldReplace;
+ });
+ return Changed ? &GEP : nullptr;
+ }
+ } else {
+ // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
+ Value *V;
+ if ((has_single_bit(TyAllocSize) &&
+ match(GEP.getOperand(1),
+ m_Exact(m_AShr(m_Value(V),
+ m_SpecificInt(countr_zero(TyAllocSize)))))) ||
+ match(GEP.getOperand(1),
+ m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ Builder.getInt8Ty(), GEP.getPointerOperand(), V);
+ NewGEP->setIsInBounds(GEP.isInBounds());
+ return NewGEP;
+ }
}
-
- // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
- // only if both point to the same underlying object (otherwise provenance
- // is not necessarily retained).
- Value *Y;
- Value *X = GEP.getOperand(0);
- if (Matched &&
- match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
- getUnderlyingObject(X) == getUnderlyingObject(Y))
- return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
}
}
// We do not handle pointer-vector geps here.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 899d7e0a11e6..06c87bd6dc37 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -366,6 +366,13 @@ struct Decomposition {
append_range(Vars, Other.Vars);
}
+ void sub(const Decomposition &Other) {
+ Decomposition Tmp = Other;
+ Tmp.mul(-1);
+ add(Tmp.Offset);
+ append_range(Vars, Tmp.Vars);
+ }
+
void mul(int64_t Factor) {
Offset = multiplyWithOverflow(Offset, Factor);
for (auto &Var : Vars)
@@ -569,10 +576,12 @@ static Decomposition decompose(Value *V,
return Result;
}
- if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI))
- return {-1 * CI->getSExtValue(), {{1, Op0}}};
- if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
- return {0, {{1, Op0}, {-1, Op1}}};
+ if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) {
+ auto ResA = decompose(Op0, Preconditions, IsSigned, DL);
+ auto ResB = decompose(Op1, Preconditions, IsSigned, DL);
+ ResA.sub(ResB);
+ return ResA;
+ }
return {V, IsKnownNonNegative};
}
@@ -1010,22 +1019,14 @@ void State::addInfoFor(BasicBlock &BB) {
continue;
}
- if (match(&I, m_Intrinsic<Intrinsic::ssub_with_overflow>())) {
- WorkList.push_back(
- FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
- continue;
- }
-
- if (isa<MinMaxIntrinsic>(&I)) {
- WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I));
- continue;
- }
-
- Value *A, *B;
- CmpInst::Predicate Pred;
- // For now, just handle assumes with a single compare as condition.
- if (match(&I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_Value(A), m_Value(B))))) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
+ switch (ID) {
+ case Intrinsic::assume: {
+ Value *A, *B;
+ CmpInst::Predicate Pred;
+ if (!match(I.getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B))))
+ break;
if (GuaranteedToExecute) {
// The assume is guaranteed to execute when BB is entered, hence Cond
// holds on entry to BB.
@@ -1035,7 +1036,23 @@ void State::addInfoFor(BasicBlock &BB) {
WorkList.emplace_back(
FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
}
+ break;
+ }
+ // Enqueue ssub_with_overflow for simplification.
+ case Intrinsic::ssub_with_overflow:
+ WorkList.push_back(
+ FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
+ break;
+ // Enqueue the intrinsics to add extra info.
+ case Intrinsic::abs:
+ case Intrinsic::umin:
+ case Intrinsic::umax:
+ case Intrinsic::smin:
+ case Intrinsic::smax:
+ WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I));
+ break;
}
+
GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
}
@@ -1693,6 +1710,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
ICmpInst::Predicate Pred;
if (!CB.isConditionFact()) {
+ Value *X;
+ if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
+ // TODO: Add CB.Inst >= 0 fact.
+ AddFact(CmpInst::ICMP_SGE, CB.Inst, X);
+ continue;
+ }
+
if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
AddFact(Pred, MinMax, MinMax->getLHS());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index fb4d82885377..282c44563466 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -29,9 +29,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -66,19 +67,6 @@ class CanonicalizeFreezeInLoopsImpl {
ScalarEvolution &SE;
DominatorTree &DT;
- struct FrozenIndPHIInfo {
- // A freeze instruction that uses an induction phi
- FreezeInst *FI = nullptr;
- // The induction phi, step instruction, the operand idx of StepInst which is
- // a step value
- PHINode *PHI;
- BinaryOperator *StepInst;
- unsigned StepValIdx = 0;
-
- FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst)
- : PHI(PHI), StepInst(StepInst) {}
- };
-
// Can freeze instruction be pushed into operands of I?
// In order to do this, I should not create a poison after I's flags are
// stripped.
@@ -99,6 +87,46 @@ public:
} // anonymous namespace
+namespace llvm {
+
+struct FrozenIndPHIInfo {
+ // A freeze instruction that uses an induction phi
+ FreezeInst *FI = nullptr;
+ // The induction phi, step instruction, the operand idx of StepInst which is
+ // a step value
+ PHINode *PHI;
+ BinaryOperator *StepInst;
+ unsigned StepValIdx = 0;
+
+ FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst)
+ : PHI(PHI), StepInst(StepInst) {}
+
+ bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; }
+};
+
+template <> struct DenseMapInfo<FrozenIndPHIInfo> {
+ static inline FrozenIndPHIInfo getEmptyKey() {
+ return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(),
+ DenseMapInfo<BinaryOperator *>::getEmptyKey());
+ }
+
+ static inline FrozenIndPHIInfo getTombstoneKey() {
+ return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getTombstoneKey(),
+ DenseMapInfo<BinaryOperator *>::getTombstoneKey());
+ }
+
+ static unsigned getHashValue(const FrozenIndPHIInfo &Val) {
+ return DenseMapInfo<FreezeInst *>::getHashValue(Val.FI);
+ };
+
+ static bool isEqual(const FrozenIndPHIInfo &LHS,
+ const FrozenIndPHIInfo &RHS) {
+ return LHS.FI == RHS.FI;
+ };
+};
+
+} // end namespace llvm
+
// Given U = (value, user), replace value with freeze(value), and let
// SCEV forget user. The inserted freeze is placed in the preheader.
void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
@@ -126,7 +154,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() {
if (!L->isLoopSimplifyForm())
return false;
- SmallVector<FrozenIndPHIInfo, 4> Candidates;
+ SmallSetVector<FrozenIndPHIInfo, 4> Candidates;
for (auto &PHI : L->getHeader()->phis()) {
InductionDescriptor ID;
@@ -155,7 +183,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() {
if (auto *FI = dyn_cast<FreezeInst>(U)) {
LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n");
Info.FI = FI;
- Candidates.push_back(Info);
+ Candidates.insert(Info);
}
};
for_each(PHI.users(), Visit);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index a758fb306982..c76cc9db16d7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -3593,8 +3593,9 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
if (isa<ConstantInt>(C))
return createIntegerExpression(C);
- if (Ty.isFloatTy() || Ty.isDoubleTy()) {
- const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF();
+ auto *FP = dyn_cast<ConstantFP>(&C);
+ if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) {
+ const APFloat &APF = FP->getValueAPF();
return DIB.createConstantValueExpression(
APF.bitcastToAPInt().getZExtValue());
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f82e161fb846..8e135d80f4f2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8174,13 +8174,20 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
bool Consecutive =
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+ VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
+ if (Consecutive) {
+ auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+ Reverse, I->getDebugLoc());
+ Builder.getInsertBlock()->appendRecipe(VectorPtr);
+ Ptr = VectorPtr;
+ }
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
- Consecutive, Reverse);
+ return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
+ Reverse);
StoreInst *Store = cast<StoreInst>(I);
- return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
- Mask, Consecutive, Reverse);
+ return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask,
+ Consecutive, Reverse);
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -9475,8 +9482,8 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();
if (isMaskRequired) {
- // Mask reversal is only neede for non-all-one (null) masks, as reverse of a
- // null all-one mask is a null mask.
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse of
+ // a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
if (isReverse())
@@ -9485,44 +9492,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
}
}
- const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
- // Calculate the pointer for the specific unroll-part.
- Value *PartPtr = nullptr;
-
- // Use i32 for the gep index type when the value is constant,
- // or query DataLayout for a more suitable index type otherwise.
- const DataLayout &DL =
- Builder.GetInsertBlock()->getModule()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0)
- ? DL.getIndexType(PointerType::getUnqual(
- ScalarDataTy->getContext()))
- : Builder.getInt32Ty();
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = gep->isInBounds();
- if (isReverse()) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -Part * RunTimeVF
- Value *NumElt =
- Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
- PartPtr =
- Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
- PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
- }
-
- return PartPtr;
- };
-
// Handle Stores:
if (SI) {
State.setDebugLocFrom(SI->getDebugLoc());
@@ -9543,8 +9512,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
- auto *VecPtr =
- CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ auto *VecPtr = State.get(getAddr(), Part);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
@@ -9568,8 +9536,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
- auto *VecPtr =
- CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ auto *VecPtr = State.get(getAddr(), Part);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32913b3f5569..304991526064 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses(
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
Instruction *UserInst = dyn_cast<Instruction>(U);
- if (!UserInst)
+ if (!UserInst || isDeleted(UserInst))
continue;
- if (isDeleted(UserInst))
+ // Ignore users in the user ignore list.
+ if (UserIgnoreList && UserIgnoreList->contains(UserInst))
continue;
// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {
- Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized
- // instructions. If that is the case, the one in Lane 0 will
+ // instructions. If that is the case, the one in FoundLane will
// be used.
- if (UseScalar != U ||
- UseEntry->State == TreeEntry::ScatterVectorize ||
+ if (UseEntry->State == TreeEntry::ScatterVectorize ||
UseEntry->State == TreeEntry::PossibleStridedVectorize ||
- !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
+ !doesInTreeUserNeedToExtract(
+ Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
continue;
}
+ U = nullptr;
}
- // Ignore users in the user ignore list.
- if (UserIgnoreList && UserIgnoreList->contains(UserInst))
- continue;
-
- LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
- << Lane << " from " << *Scalar << ".\n");
- ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane));
+ LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst
+ << " from lane " << Lane << " from " << *Scalar
+ << ".\n");
+ ExternalUses.emplace_back(Scalar, U, FoundLane);
}
}
}
@@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
(void)E;
return TTI->getInstructionCost(VI, CostKind);
};
+ // FIXME: Workaround for syntax error reported by MSVC buildbots.
+ TargetTransformInfo &TTIRef = *TTI;
// Need to clear CommonCost since the final shuffle cost is included into
// vector cost.
auto GetVectorCost = [&](InstructionCost) {
@@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// No need to add new vector costs here since we're going to reuse
// same main/alternate vector ops, just do different shuffling.
} else if (Instruction::isBinaryOp(E->getOpcode())) {
- VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
+ VecCost =
+ TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost +=
- TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
+ TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
- VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
- CI0->getPredicate(), CostKind, VL0);
- VecCost += TTI->getCmpSelInstrCost(
+ VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
+ CI0->getPredicate(), CostKind, VL0);
+ VecCost += TTIRef.getCmpSelInstrCost(
E->getOpcode(), VecTy, MaskTy,
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
@@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
- VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
- TTI::CastContextHint::None, CostKind);
- VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
- TTI::CastContextHint::None, CostKind);
+ VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
+ TTI::CastContextHint::None, CostKind);
+ VecCost +=
+ TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
+ TTI::CastContextHint::None, CostKind);
}
SmallVector<int> Mask;
E->buildAltOpShuffleMask(
@@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return I->getOpcode() == E->getAltOpcode();
},
Mask);
- VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- FinalVecTy, Mask);
+ VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
+ // Patterns like [fadd,fsub] can be combined into a single instruction
+ // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
+ // need to take into account their order when looking for the most used
+ // order.
+ unsigned Opcode0 = E->getOpcode();
+ unsigned Opcode1 = E->getAltOpcode();
+ // The opcode mask selects between the two opcodes.
+ SmallBitVector OpcodeMask(E->Scalars.size(), false);
+ for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
+ if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
+ OpcodeMask.set(Lane);
+ // If this pattern is supported by the target then we consider the
+ // order.
+ if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
+ InstructionCost AltVecCost = TTIRef.getAltInstrCost(
+ VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+ return AltVecCost < VecCost ? AltVecCost : VecCost;
+ }
+ // TODO: Check the reverse order too.
return VecCost;
};
return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *PO = LI->getPointerOperand();
if (E->State == TreeEntry::Vectorize) {
NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign());
-
- // The pointer operand uses an in-tree scalar so we add the new
- // LoadInst to ExternalUses list to make sure that an extract will
- // be generated in the future.
- if (isa<Instruction>(PO)) {
- if (TreeEntry *Entry = getTreeEntry(PO)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(PO);
- ExternalUses.emplace_back(PO, NewLI, FoundLane);
- }
- }
} else {
assert((E->State == TreeEntry::ScatterVectorize ||
E->State == TreeEntry::PossibleStridedVectorize) &&
@@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
StoreInst *ST =
Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
- // The pointer operand uses an in-tree scalar, so add the new StoreInst to
- // ExternalUses to make sure that an extract will be generated in the
- // future.
- if (isa<Instruction>(Ptr)) {
- if (TreeEntry *Entry = getTreeEntry(Ptr)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(Ptr);
- ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane));
- }
- }
-
Value *V = propagateMetadata(ST, E->Scalars);
E->VectorizedValue = V;
@@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E);
- Intrinsic::ID IID = Intrinsic::not_intrinsic;
- if (Function *FI = CI->getCalledFunction())
- IID = FI->getIntrinsicID();
-
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
@@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
SmallVector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+ if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
TysForDecl.push_back(
FixedVectorType::get(CI->getType(), E->Scalars.size()));
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
- if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) {
+ if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(I);
OpVecs.push_back(CEI->getArgOperand(I));
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(ScalarArg->getType());
continue;
}
@@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
- if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I))
+ if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
TysForDecl.push_back(OpVec->getType());
}
@@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CI->getOperandBundlesAsDefs(OpBundles);
Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
- // The scalar argument uses an in-tree scalar so we add the new vectorized
- // call to ExternalUses list to make sure that an extract will be
- // generated in the future.
- if (isa_and_present<Instruction>(ScalarArg)) {
- if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
- // Find which lane we need to extract.
- unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
- ExternalUses.push_back(
- ExternalUser(ScalarArg, cast<User>(V), FoundLane));
- }
- }
-
propagateIRFlags(V, E->Scalars, VL0);
V = FinalShuffle(V, E, VecTy, IsSigned);
@@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree(
DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
SmallDenseSet<Value *, 4> UsedInserts;
DenseMap<Value *, Value *> VectorCasts;
+ SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree(
VectorToInsertElement.try_emplace(Vec, IE);
return Vec;
};
- // If User == nullptr, the Scalar is used as extra arg. Generate
- // ExtractElement instruction and update the record for this scalar in
- // ExternallyUsedValues.
+ // If User == nullptr, the Scalar remains as scalar in vectorized
+ // instructions or is used as extra arg. Generate ExtractElement instruction
+ // and update the record for this scalar in ExternallyUsedValues.
if (!User) {
- assert(ExternallyUsedValues.count(Scalar) &&
- "Scalar with nullptr as an external user must be registered in "
- "ExternallyUsedValues map");
+ if (!ScalarsWithNullptrUser.insert(Scalar).second)
+ continue;
+ assert((ExternallyUsedValues.count(Scalar) ||
+ any_of(Scalar->users(),
+ [&](llvm::User *U) {
+ TreeEntry *UseEntry = getTreeEntry(U);
+ return UseEntry &&
+ UseEntry->State == TreeEntry::Vectorize &&
+ E->State == TreeEntry::Vectorize &&
+ doesInTreeUserNeedToExtract(
+ Scalar,
+ cast<Instruction>(UseEntry->Scalars.front()),
+ TLI);
+ })) &&
+ "Scalar with nullptr User must be registered in "
+ "ExternallyUsedValues map or remain as scalar in vectorized "
+ "instructions");
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (auto *PHI = dyn_cast<PHINode>(VecI))
Builder.SetInsertPoint(PHI->getParent(),
@@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
for (auto *V : Candidates) {
auto *GEP = cast<GetElementPtrInst>(V);
auto *GEPIdx = GEP->idx_begin()->get();
- assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
+ assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx));
Bundle[BundleIndex++] = GEPIdx;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 94cb76889813..7d33baac52c9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1357,6 +1357,36 @@ public:
#endif
};
+/// A recipe to compute the pointers for widened memory accesses of IndexTy for
+/// all parts. If IsReverse is true, compute pointers for accessing the input in
+/// reverse order per part.
+class VPVectorPointerRecipe : public VPRecipeBase, public VPValue {
+ Type *IndexedTy;
+ bool IsReverse;
+
+public:
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
+ DebugLoc DL)
+ : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this),
+ IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A pure virtual base class for all recipes modeling header phis, including
/// phis for first order recurrences, pointer inductions and reductions. The
/// start value is the first operand of the recipe and the incoming value from
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02e400d590be..76961629aece 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -498,16 +498,17 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
"DbgInfoIntrinsic should have been dropped during VPlan construction");
State.setDebugLocFrom(CI.getDebugLoc());
+ bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
FunctionType *VFTy = nullptr;
if (Variant)
VFTy = Variant->getFunctionType();
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
- if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
TysForDecl.push_back(
VectorType::get(CI.getType()->getScalarType(), State.VF));
- }
SmallVector<Value *, 4> Args;
for (const auto &I : enumerate(operands())) {
// Some intrinsics have a scalar argument - don't replace it with a
@@ -516,18 +517,19 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
// e.g. linear parameters for pointers.
Value *Arg;
if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) ||
- (VectorIntrinsicID != Intrinsic::not_intrinsic &&
+ (UseIntrinsic &&
isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())))
Arg = State.get(I.value(), VPIteration(0, 0));
else
Arg = State.get(I.value(), Part);
- if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
TysForDecl.push_back(Arg->getType());
Args.push_back(Arg);
}
Function *VectorF;
- if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
+ if (UseIntrinsic) {
// Use vector version of the intrinsic.
Module *M = State.Builder.GetInsertBlock()->getModule();
VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
@@ -1209,6 +1211,59 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPVectorPointerRecipe ::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = nullptr;
+ // Use i32 for the gep index type when the value is constant,
+ // or query DataLayout for a more suitable index type otherwise.
+ const DataLayout &DL =
+ Builder.GetInsertBlock()->getModule()->getDataLayout();
+ Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
+ ? DL.getIndexType(IndexedTy->getPointerTo())
+ : Builder.getInt32Ty();
+ Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
+ bool InBounds = false;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+ InBounds = GEP->isInBounds();
+ if (IsReverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
+ // NumElt = -Part * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane =
+ Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+ PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
+ } else {
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
+ PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
+ }
+
+ State.set(this, PartPtr, Part);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = vector-pointer ";
+ if (IsReverse)
+ O << "(reverse) ";
+
+ printOperands(O, SlotTracker);
+}
+#endif
+
void VPBlendRecipe::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
// We know that all PHIs in non-header blocks are converted into
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 116acad8e8f3..8cc98f4abf93 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -351,6 +351,7 @@ public:
VPReductionSC,
VPReplicateSC,
VPScalarIVStepsSC,
+ VPVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
diff --git a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
index fcb6392a1d95..299b7856ec0b 100644
--- a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -1287,8 +1287,7 @@ static const char *matchFlagWithArg(StringRef Expected,
ArrayRef<const char *> Args) {
StringRef Arg = *ArgIt;
- if (Arg.starts_with("--"))
- Arg = Arg.substr(2);
+ Arg.consume_front("--");
size_t len = Expected.size();
if (Arg == Expected) {
diff --git a/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp b/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp
index 6fe18a51c9f5..3e77b1ed89b0 100644
--- a/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-diff/llvm-diff.cpp
@@ -42,8 +42,7 @@ static std::unique_ptr<Module> readModule(LLVMContext &Context,
static void diffGlobal(DifferenceEngine &Engine, Module &L, Module &R,
StringRef Name) {
// Drop leading sigils from the global name.
- if (Name.starts_with("@"))
- Name = Name.substr(1);
+ Name.consume_front("@");
Function *LFn = L.getFunction(Name);
Function *RFn = R.getFunction(Name);
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 06e7ec3b9230..9ee1472bdf5c 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -563,6 +563,13 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_EVEX_L2_W_XD_KZ_B:
case IC_EVEX_L2_W_OPSIZE_KZ_B:
return false;
+ case IC_EVEX_NF:
+ case IC_EVEX_B_NF:
+ case IC_EVEX_OPSIZE_NF:
+ case IC_EVEX_OPSIZE_B_NF:
+ case IC_EVEX_W_NF:
+ case IC_EVEX_W_B_NF:
+ return false;
default:
errs() << "Unknown instruction class: "
<< stringForContext((InstructionContext)parent) << "\n";
@@ -889,7 +896,19 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if ((index & ATTR_EVEX) && (index & ATTR_OPSIZE) && (index & ATTR_ADSIZE))
o << "IC_EVEX_OPSIZE_ADSIZE";
- else if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) {
+ else if (index & ATTR_EVEXNF) {
+ o << "IC_EVEX";
+ if (index & ATTR_REXW)
+ o << "_W";
+ else if (index & ATTR_OPSIZE)
+ o << "_OPSIZE";
+
+ if (index & ATTR_EVEXB)
+ o << "_B";
+
+ o << "_NF";
+ } else if ((index & ATTR_EVEX) || (index & ATTR_VEX) ||
+ (index & ATTR_VEXL)) {
if (index & ATTR_EVEX)
o << "IC_EVEX";
else
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index d3299e281031..101b75e2f087 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -348,7 +348,9 @@ public:
// memory form: broadcast
if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))
return false;
- if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B))
+ // EVEX_B indicates NDD for MAP4 instructions
+ if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) &&
+ RegRI.OpMap != X86Local::T_MAP4)
return false;
if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))
@@ -369,7 +371,8 @@ public:
RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W,
RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L,
RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ,
- RegRI.HasEVEX_L2, RegRec->getValueAsBit("hasEVEX_RC"),
+ RegRI.HasEVEX_L2, RegRI.HasEVEX_NF,
+ RegRec->getValueAsBit("hasEVEX_RC"),
RegRec->getValueAsBit("hasLockPrefix"),
RegRec->getValueAsBit("hasNoTrackPrefix"),
RegRec->getValueAsBit("EVEX_W1_VEX_W0")) !=
@@ -377,7 +380,8 @@ public:
MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W,
MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L,
MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ,
- MemRI.HasEVEX_L2, MemRec->getValueAsBit("hasEVEX_RC"),
+ MemRI.HasEVEX_L2, MemRI.HasEVEX_NF,
+ MemRec->getValueAsBit("hasEVEX_RC"),
MemRec->getValueAsBit("hasLockPrefix"),
MemRec->getValueAsBit("hasNoTrackPrefix"),
MemRec->getValueAsBit("EVEX_W1_VEX_W0")))
@@ -668,6 +672,14 @@ void X86FoldTablesEmitter::run(raw_ostream &O) {
if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end())
continue;
+ // Promoted legacy instruction is in EVEX space, and has REX2-encoding
+ // alternative. It's added due to HW design and never emitted by compiler.
+ if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) ==
+ X86Local::T_MAP4 &&
+ byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) ==
+ X86Local::ExplicitEVEX)
+ continue;
+
// - Instructions including RST register class operands are not relevant
// for memory folding (for further details check the explanation in
// lib/Target/X86/X86InstrFPStack.td file).
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 47ee9544f323..fb430676c504 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -125,6 +125,7 @@ RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) {
HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z");
HasEVEX_B = Rec->getValueAsBit("hasEVEX_B");
+ HasEVEX_NF = Rec->getValueAsBit("hasEVEX_NF");
IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
IsAsmParserOnly = Rec->getValueAsBit("isAsmParserOnly");
ForceDisassemble = Rec->getValueAsBit("ForceDisassemble");
@@ -185,6 +186,9 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
: (HasEVEX_KZ ? n##_KZ \
: (HasEVEX_K ? n##_K : (HasEVEX_B ? n##_B : n)))))
+#define EVEX_NF(n) (HasEVEX_NF ? n##_NF : n)
+#define EVEX_B_NF(n) (HasEVEX_B ? EVEX_NF(n##_B) : EVEX_NF(n))
+
InstructionContext RecognizableInstr::insnContext() const {
InstructionContext insnContext;
@@ -193,8 +197,15 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n";
llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
}
- // VEX_L & VEX_W
- if (!EncodeRC && HasVEX_L && HasREX_W) {
+ if (HasEVEX_NF) {
+ if (OpPrefix == X86Local::PD)
+ insnContext = EVEX_B_NF(IC_EVEX_OPSIZE);
+ else if (HasREX_W)
+ insnContext = EVEX_B_NF(IC_EVEX_W);
+ else
+ insnContext = EVEX_B_NF(IC_EVEX);
+ } else if (!EncodeRC && HasVEX_L && HasREX_W) {
+ // VEX_L & VEX_W
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
else if (OpPrefix == X86Local::XS)
@@ -486,6 +497,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
++additionalOperands;
#endif
+ bool IsND = OpMap == X86Local::T_MAP4 && HasEVEX_B && HasVEX_4V;
switch (Form) {
default:
llvm_unreachable("Unhandled form");
@@ -536,11 +548,14 @@ void RecognizableInstr::emitInstructionSpecifier() {
numPhysicalOperands <= 3 + additionalOperands &&
"Unexpected number of operands for MRMDestReg");
+ if (IsND)
+ HANDLE_OPERAND(vvvvRegister)
+
HANDLE_OPERAND(rmRegister)
if (HasEVEX_K)
HANDLE_OPERAND(writemaskRegister)
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
// FIXME: In AVX, the register below becomes the one encoded
// in ModRMVEX and the one above the one in the VEX.VVVV field
HANDLE_OPERAND(vvvvRegister)
@@ -570,12 +585,15 @@ void RecognizableInstr::emitInstructionSpecifier() {
numPhysicalOperands <= 3 + additionalOperands &&
"Unexpected number of operands for MRMDestMemFrm with VEX_4V");
+ if (IsND)
+ HANDLE_OPERAND(vvvvRegister)
+
HANDLE_OPERAND(memory)
if (HasEVEX_K)
HANDLE_OPERAND(writemaskRegister)
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
// FIXME: In AVX, the register below becomes the one encoded
// in ModRMVEX and the one above the one in the VEX.VVVV field
HANDLE_OPERAND(vvvvRegister)
@@ -594,12 +612,15 @@ void RecognizableInstr::emitInstructionSpecifier() {
numPhysicalOperands <= 4 + additionalOperands &&
"Unexpected number of operands for MRMSrcRegFrm");
+ if (IsND)
+ HANDLE_OPERAND(vvvvRegister)
+
HANDLE_OPERAND(roRegister)
if (HasEVEX_K)
HANDLE_OPERAND(writemaskRegister)
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
// FIXME: In AVX, the register below becomes the one encoded
// in ModRMVEX and the one above the one in the VEX.VVVV field
HANDLE_OPERAND(vvvvRegister)
@@ -641,13 +662,15 @@ void RecognizableInstr::emitInstructionSpecifier() {
assert(numPhysicalOperands >= 2 + additionalOperands &&
numPhysicalOperands <= 4 + additionalOperands &&
"Unexpected number of operands for MRMSrcMemFrm");
+ if (IsND)
+ HANDLE_OPERAND(vvvvRegister)
HANDLE_OPERAND(roRegister)
if (HasEVEX_K)
HANDLE_OPERAND(writemaskRegister)
- if (HasVEX_4V)
+ if (!IsND && HasVEX_4V)
// FIXME: In AVX, the register below becomes the one encoded
// in ModRMVEX and the one above the one in the VEX.VVVV field
HANDLE_OPERAND(vvvvRegister)
@@ -1216,6 +1239,8 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
OperandEncoding
RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
uint8_t OpSize) {
+ ENCODING("GR8", ENCODING_VVVV)
+ ENCODING("GR16", ENCODING_VVVV)
ENCODING("GR32", ENCODING_VVVV)
ENCODING("GR64", ENCODING_VVVV)
ENCODING("FR32", ENCODING_VVVV)
diff --git a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h
index 61ad5e32b3fb..007c700cdfaf 100644
--- a/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/contrib/llvm-project/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -172,7 +172,7 @@ enum { PD = 1, XS = 2, XD = 3, PS = 4 };
enum { VEX = 1, XOP = 2, EVEX = 3 };
enum { OpSize16 = 1, OpSize32 = 2 };
enum { AdSize16 = 1, AdSize32 = 2, AdSize64 = 3 };
-enum { ExplicitREX2 = 1 };
+enum { ExplicitREX2 = 1, ExplicitEVEX = 3 };
} // namespace X86Local
namespace X86Disassembler {
@@ -212,6 +212,8 @@ struct RecognizableInstrBase {
bool HasEVEX_KZ;
/// The hasEVEX_B field from the record
bool HasEVEX_B;
+ /// The hasEVEX_NF field from the record
+ bool HasEVEX_NF;
/// Indicates that the instruction uses the L and L' fields for RC.
bool EncodeRC;
/// The isCodeGenOnly field from the record