diff options
Diffstat (limited to 'test/CodeGen/X86/atomic_mi.ll')
| -rw-r--r-- | test/CodeGen/X86/atomic_mi.ll | 998 |
1 files changed, 0 insertions, 998 deletions
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll deleted file mode 100644 index e9f1b59ac589..000000000000 --- a/test/CodeGen/X86/atomic_mi.ll +++ /dev/null @@ -1,998 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC - -; This file checks that atomic (non-seq_cst) stores of immediate values are -; done in one mov instruction and not 2. More precisely, it makes sure that the -; immediate is not first copied uselessly into a register. - -; Similarily, it checks that a binary operation of an immediate with an atomic -; variable that is stored back in that variable is done as a single instruction. -; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release) -; should be just an add instruction, instead of loading x into a register, doing -; an add and storing the result back. -; The binary operations supported are currently add, and, or, xor. -; sub is not supported because they are translated by an addition of the -; negated immediate. -; -; We also check the same patterns: -; - For inc/dec. -; - For register instead of immediate operands. -; - For floating point operations. - -; seq_cst stores are left as (lock) xchgl, but we try to check every other -; attribute at least once. - -; Please note that these operations do not require the lock prefix: only -; sequentially consistent stores require this kind of protection on X86. -; And even for seq_cst operations, llvm uses the xchg instruction which has -; an implicit lock prefix, so making it explicit is not required. - -define void @store_atomic_imm_8(i8* %p) { -; X64-LABEL: store_atomic_imm_8: -; X64: movb -; X64-NOT: movb -; X32-LABEL: store_atomic_imm_8: -; X32: movb -; X32-NOT: movb - store atomic i8 42, i8* %p release, align 1 - ret void -} - -define void @store_atomic_imm_16(i16* %p) { -; X64-LABEL: store_atomic_imm_16: -; X64: movw -; X64-NOT: movw -; X32-LABEL: store_atomic_imm_16: -; X32: movw -; X32-NOT: movw - store atomic i16 42, i16* %p monotonic, align 2 - ret void -} - -define void @store_atomic_imm_32(i32* %p) { -; X64-LABEL: store_atomic_imm_32: -; X64: movl -; X64-NOT: movl -; On 32 bits, there is an extra movl for each of those functions -; (probably for alignment reasons). -; X32-LABEL: store_atomic_imm_32: -; X32: movl 4(%esp), %eax -; X32: movl -; X32-NOT: movl - store atomic i32 42, i32* %p release, align 4 - ret void -} - -define void @store_atomic_imm_64(i64* %p) { -; X64-LABEL: store_atomic_imm_64: -; X64: movq -; X64-NOT: movq -; These are implemented with a CAS loop on 32 bit architectures, and thus -; cannot be optimized in the same way as the others. -; X32-LABEL: store_atomic_imm_64: -; X32: cmpxchg8b - store atomic i64 42, i64* %p release, align 8 - ret void -} - -; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, -; even on X64, one must use movabsq that can only target a register. -define void @store_atomic_imm_64_big(i64* %p) { -; X64-LABEL: store_atomic_imm_64_big: -; X64: movabsq -; X64: movq - store atomic i64 100000000000, i64* %p monotonic, align 8 - ret void -} - -; It would be incorrect to replace a lock xchgl by a movl -define void @store_atomic_imm_32_seq_cst(i32* %p) { -; X64-LABEL: store_atomic_imm_32_seq_cst: -; X64: xchgl -; X32-LABEL: store_atomic_imm_32_seq_cst: -; X32: xchgl - store atomic i32 42, i32* %p seq_cst, align 4 - ret void -} - -; ----- ADD ----- - -define void @add_8i(i8* %p) { -; X64-LABEL: add_8i: -; X64-NOT: lock -; X64: addb -; X64-NOT: movb -; X32-LABEL: add_8i: -; X32-NOT: lock -; X32: addb -; X32-NOT: movb - %1 = load atomic i8, i8* %p seq_cst, align 1 - %2 = add i8 %1, 2 - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @add_8r(i8* %p, i8 %v) { -; X64-LABEL: add_8r: -; X64-NOT: lock -; X64: addb -; X64-NOT: movb -; X32-LABEL: add_8r: -; X32-NOT: lock -; X32: addb -; X32-NOT: movb - %1 = load atomic i8, i8* %p seq_cst, align 1 - %2 = add i8 %1, %v - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @add_16i(i16* %p) { -; Currently the transformation is not done on 16 bit accesses, as the backend -; treat 16 bit arithmetic as expensive on X86/X86_64. -; X64-LABEL: add_16i: -; X64-NOT: addw -; X32-LABEL: add_16i: -; X32-NOT: addw - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = add i16 %1, 2 - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @add_16r(i16* %p, i16 %v) { -; Currently the transformation is not done on 16 bit accesses, as the backend -; treat 16 bit arithmetic as expensive on X86/X86_64. -; X64-LABEL: add_16r: -; X64-NOT: addw -; X32-LABEL: add_16r: -; X32-NOT: addw [.*], ( - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = add i16 %1, %v - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @add_32i(i32* %p) { -; X64-LABEL: add_32i: -; X64-NOT: lock -; X64: addl -; X64-NOT: movl -; X32-LABEL: add_32i: -; X32-NOT: lock -; X32: addl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = add i32 %1, 2 - store atomic i32 %2, i32* %p monotonic, align 4 - ret void -} - -define void @add_32r(i32* %p, i32 %v) { -; X64-LABEL: add_32r: -; X64-NOT: lock -; X64: addl -; X64-NOT: movl -; X32-LABEL: add_32r: -; X32-NOT: lock -; X32: addl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = add i32 %1, %v - store atomic i32 %2, i32* %p monotonic, align 4 - ret void -} - -; The following is a corner case where the load is added to itself. The pattern -; matching should not fold this. We only test with 32-bit add, but the same -; applies to other sizes and operations. -define void @add_32r_self(i32* %p) { -; X64-LABEL: add_32r_self: -; X64-NOT: lock -; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]] -; X64: addl %[[R]], %[[R]] -; X64: movl %[[R]], (%[[M]]) -; X32-LABEL: add_32r_self: -; X32-NOT: lock -; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]] -; X32: addl %[[R]], %[[R]] -; X32: movl %[[R]], (%[[M]]) - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = add i32 %1, %1 - store atomic i32 %2, i32* %p monotonic, align 4 - ret void -} - -; The following is a corner case where the load's result is returned. The -; optimizer isn't allowed to duplicate the load because it's atomic. -define i32 @add_32r_ret_load(i32* %p, i32 %v) { -; X64-LABEL: add_32r_ret_load: -; X64-NOT: lock -; X64: movl (%rdi), %eax -; X64-NEXT: addl %eax, %esi -; X64-NEXT: movl %esi, (%rdi) -; X64-NEXT: retq -; X32-LABEL: add_32r_ret_load: -; X32-NOT: lock -; X32: movl 4(%esp), %[[P:[a-z]+]] -; X32-NEXT: movl (%[[P]]), -; X32-NOT: %[[P]] -; More code here, we just don't want it to load from P. -; X32: movl %{{.*}}, (%[[P]]) -; X32-NEXT: retl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = add i32 %1, %v - store atomic i32 %2, i32* %p monotonic, align 4 - ret i32 %1 -} - -define void @add_64i(i64* %p) { -; X64-LABEL: add_64i: -; X64-NOT: lock -; X64: addq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'addq'. -; X32-LABEL: add_64i: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = add i64 %1, 2 - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @add_64r(i64* %p, i64 %v) { -; X64-LABEL: add_64r: -; X64-NOT: lock -; X64: addq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'addq'. -; X32-LABEL: add_64r: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = add i64 %1, %v - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @add_32i_seq_cst(i32* %p) { -; X64-LABEL: add_32i_seq_cst: -; X64: xchgl -; X32-LABEL: add_32i_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = add i32 %1, 2 - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -define void @add_32r_seq_cst(i32* %p, i32 %v) { -; X64-LABEL: add_32r_seq_cst: -; X64: xchgl -; X32-LABEL: add_32r_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = add i32 %1, %v - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -; ----- AND ----- - -define void @and_8i(i8* %p) { -; X64-LABEL: and_8i: -; X64-NOT: lock -; X64: andb -; X64-NOT: movb -; X32-LABEL: and_8i: -; X32-NOT: lock -; X32: andb -; X32-NOT: movb - %1 = load atomic i8, i8* %p monotonic, align 1 - %2 = and i8 %1, 2 - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @and_8r(i8* %p, i8 %v) { -; X64-LABEL: and_8r: -; X64-NOT: lock -; X64: andb -; X64-NOT: movb -; X32-LABEL: and_8r: -; X32-NOT: lock -; X32: andb -; X32-NOT: movb - %1 = load atomic i8, i8* %p monotonic, align 1 - %2 = and i8 %1, %v - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @and_16i(i16* %p) { -; Currently the transformation is not done on 16 bit accesses, as the backend -; treat 16 bit arithmetic as expensive on X86/X86_64. -; X64-LABEL: and_16i: -; X64-NOT: andw -; X32-LABEL: and_16i: -; X32-NOT: andw - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = and i16 %1, 2 - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @and_16r(i16* %p, i16 %v) { -; Currently the transformation is not done on 16 bit accesses, as the backend -; treat 16 bit arithmetic as expensive on X86/X86_64. -; X64-LABEL: and_16r: -; X64-NOT: andw -; X32-LABEL: and_16r: -; X32-NOT: andw [.*], ( - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = and i16 %1, %v - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @and_32i(i32* %p) { -; X64-LABEL: and_32i: -; X64-NOT: lock -; X64: andl -; X64-NOT: movl -; X32-LABEL: and_32i: -; X32-NOT: lock -; X32: andl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = and i32 %1, 2 - store atomic i32 %2, i32* %p release, align 4 - ret void -} - -define void @and_32r(i32* %p, i32 %v) { -; X64-LABEL: and_32r: -; X64-NOT: lock -; X64: andl -; X64-NOT: movl -; X32-LABEL: and_32r: -; X32-NOT: lock -; X32: andl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = and i32 %1, %v - store atomic i32 %2, i32* %p release, align 4 - ret void -} - -define void @and_64i(i64* %p) { -; X64-LABEL: and_64i: -; X64-NOT: lock -; X64: andq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'andq'. -; X32-LABEL: and_64i: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = and i64 %1, 2 - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @and_64r(i64* %p, i64 %v) { -; X64-LABEL: and_64r: -; X64-NOT: lock -; X64: andq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'andq'. -; X32-LABEL: and_64r: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = and i64 %1, %v - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @and_32i_seq_cst(i32* %p) { -; X64-LABEL: and_32i_seq_cst: -; X64: xchgl -; X32-LABEL: and_32i_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = and i32 %1, 2 - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -define void @and_32r_seq_cst(i32* %p, i32 %v) { -; X64-LABEL: and_32r_seq_cst: -; X64: xchgl -; X32-LABEL: and_32r_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = and i32 %1, %v - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -; ----- OR ----- - -define void @or_8i(i8* %p) { -; X64-LABEL: or_8i: -; X64-NOT: lock -; X64: orb -; X64-NOT: movb -; X32-LABEL: or_8i: -; X32-NOT: lock -; X32: orb -; X32-NOT: movb - %1 = load atomic i8, i8* %p acquire, align 1 - %2 = or i8 %1, 2 - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @or_8r(i8* %p, i8 %v) { -; X64-LABEL: or_8r: -; X64-NOT: lock -; X64: orb -; X64-NOT: movb -; X32-LABEL: or_8r: -; X32-NOT: lock -; X32: orb -; X32-NOT: movb - %1 = load atomic i8, i8* %p acquire, align 1 - %2 = or i8 %1, %v - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @or_16i(i16* %p) { -; X64-LABEL: or_16i: -; X64-NOT: orw -; X32-LABEL: or_16i: -; X32-NOT: orw - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = or i16 %1, 2 - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @or_16r(i16* %p, i16 %v) { -; X64-LABEL: or_16r: -; X64-NOT: orw -; X32-LABEL: or_16r: -; X32-NOT: orw [.*], ( - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = or i16 %1, %v - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @or_32i(i32* %p) { -; X64-LABEL: or_32i: -; X64-NOT: lock -; X64: orl -; X64-NOT: movl -; X32-LABEL: or_32i: -; X32-NOT: lock -; X32: orl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = or i32 %1, 2 - store atomic i32 %2, i32* %p release, align 4 - ret void -} - -define void @or_32r(i32* %p, i32 %v) { -; X64-LABEL: or_32r: -; X64-NOT: lock -; X64: orl -; X64-NOT: movl -; X32-LABEL: or_32r: -; X32-NOT: lock -; X32: orl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = or i32 %1, %v - store atomic i32 %2, i32* %p release, align 4 - ret void -} - -define void @or_64i(i64* %p) { -; X64-LABEL: or_64i: -; X64-NOT: lock -; X64: orq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'orq'. -; X32-LABEL: or_64i: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = or i64 %1, 2 - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @or_64r(i64* %p, i64 %v) { -; X64-LABEL: or_64r: -; X64-NOT: lock -; X64: orq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'orq'. -; X32-LABEL: or_64r: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = or i64 %1, %v - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @or_32i_seq_cst(i32* %p) { -; X64-LABEL: or_32i_seq_cst: -; X64: xchgl -; X32-LABEL: or_32i_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = or i32 %1, 2 - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -define void @or_32r_seq_cst(i32* %p, i32 %v) { -; X64-LABEL: or_32r_seq_cst: -; X64: xchgl -; X32-LABEL: or_32r_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = or i32 %1, %v - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -; ----- XOR ----- - -define void @xor_8i(i8* %p) { -; X64-LABEL: xor_8i: -; X64-NOT: lock -; X64: xorb -; X64-NOT: movb -; X32-LABEL: xor_8i: -; X32-NOT: lock -; X32: xorb -; X32-NOT: movb - %1 = load atomic i8, i8* %p acquire, align 1 - %2 = xor i8 %1, 2 - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @xor_8r(i8* %p, i8 %v) { -; X64-LABEL: xor_8r: -; X64-NOT: lock -; X64: xorb -; X64-NOT: movb -; X32-LABEL: xor_8r: -; X32-NOT: lock -; X32: xorb -; X32-NOT: movb - %1 = load atomic i8, i8* %p acquire, align 1 - %2 = xor i8 %1, %v - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @xor_16i(i16* %p) { -; X64-LABEL: xor_16i: -; X64-NOT: xorw -; X32-LABEL: xor_16i: -; X32-NOT: xorw - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = xor i16 %1, 2 - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @xor_16r(i16* %p, i16 %v) { -; X64-LABEL: xor_16r: -; X64-NOT: xorw -; X32-LABEL: xor_16r: -; X32-NOT: xorw [.*], ( - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = xor i16 %1, %v - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @xor_32i(i32* %p) { -; X64-LABEL: xor_32i: -; X64-NOT: lock -; X64: xorl -; X64-NOT: movl -; X32-LABEL: xor_32i: -; X32-NOT: lock -; X32: xorl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = xor i32 %1, 2 - store atomic i32 %2, i32* %p release, align 4 - ret void -} - -define void @xor_32r(i32* %p, i32 %v) { -; X64-LABEL: xor_32r: -; X64-NOT: lock -; X64: xorl -; X64-NOT: movl -; X32-LABEL: xor_32r: -; X32-NOT: lock -; X32: xorl -; X32-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = xor i32 %1, %v - store atomic i32 %2, i32* %p release, align 4 - ret void -} - -define void @xor_64i(i64* %p) { -; X64-LABEL: xor_64i: -; X64-NOT: lock -; X64: xorq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'xorq'. -; X32-LABEL: xor_64i: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = xor i64 %1, 2 - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @xor_64r(i64* %p, i64 %v) { -; X64-LABEL: xor_64r: -; X64-NOT: lock -; X64: xorq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'xorq'. -; X32-LABEL: xor_64r: - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = xor i64 %1, %v - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @xor_32i_seq_cst(i32* %p) { -; X64-LABEL: xor_32i_seq_cst: -; X64: xchgl -; X32-LABEL: xor_32i_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = xor i32 %1, 2 - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -define void @xor_32r_seq_cst(i32* %p, i32 %v) { -; X64-LABEL: xor_32r_seq_cst: -; X64: xchgl -; X32-LABEL: xor_32r_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = xor i32 %1, %v - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -; ----- INC ----- - -define void @inc_8(i8* %p) { -; X64-LABEL: inc_8: -; X64-NOT: lock -; X64: incb -; X64-NOT: movb -; X32-LABEL: inc_8: -; X32-NOT: lock -; X32: incb -; X32-NOT: movb -; SLOW_INC-LABEL: inc_8: -; SLOW_INC-NOT: incb -; SLOW_INC-NOT: movb - %1 = load atomic i8, i8* %p seq_cst, align 1 - %2 = add i8 %1, 1 - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @inc_16(i16* %p) { -; Currently the transformation is not done on 16 bit accesses, as the backend -; treat 16 bit arithmetic as expensive on X86/X86_64. -; X64-LABEL: inc_16: -; X64-NOT: incw -; X32-LABEL: inc_16: -; X32-NOT: incw -; SLOW_INC-LABEL: inc_16: -; SLOW_INC-NOT: incw - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = add i16 %1, 1 - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @inc_32(i32* %p) { -; X64-LABEL: inc_32: -; X64-NOT: lock -; X64: incl -; X64-NOT: movl -; X32-LABEL: inc_32: -; X32-NOT: lock -; X32: incl -; X32-NOT: movl -; SLOW_INC-LABEL: inc_32: -; SLOW_INC-NOT: incl -; SLOW_INC-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = add i32 %1, 1 - store atomic i32 %2, i32* %p monotonic, align 4 - ret void -} - -define void @inc_64(i64* %p) { -; X64-LABEL: inc_64: -; X64-NOT: lock -; X64: incq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'incq'. -; X32-LABEL: inc_64: -; SLOW_INC-LABEL: inc_64: -; SLOW_INC-NOT: incq -; SLOW_INC-NOT: movq - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = add i64 %1, 1 - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @inc_32_seq_cst(i32* %p) { -; X64-LABEL: inc_32_seq_cst: -; X64: xchgl -; X32-LABEL: inc_32_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = add i32 %1, 1 - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -; ----- DEC ----- - -define void @dec_8(i8* %p) { -; X64-LABEL: dec_8: -; X64-NOT: lock -; X64: decb -; X64-NOT: movb -; X32-LABEL: dec_8: -; X32-NOT: lock -; X32: decb -; X32-NOT: movb -; SLOW_INC-LABEL: dec_8: -; SLOW_INC-NOT: decb -; SLOW_INC-NOT: movb - %1 = load atomic i8, i8* %p seq_cst, align 1 - %2 = sub i8 %1, 1 - store atomic i8 %2, i8* %p release, align 1 - ret void -} - -define void @dec_16(i16* %p) { -; Currently the transformation is not done on 16 bit accesses, as the backend -; treat 16 bit arithmetic as expensive on X86/X86_64. -; X64-LABEL: dec_16: -; X64-NOT: decw -; X32-LABEL: dec_16: -; X32-NOT: decw -; SLOW_INC-LABEL: dec_16: -; SLOW_INC-NOT: decw - %1 = load atomic i16, i16* %p acquire, align 2 - %2 = sub i16 %1, 1 - store atomic i16 %2, i16* %p release, align 2 - ret void -} - -define void @dec_32(i32* %p) { -; X64-LABEL: dec_32: -; X64-NOT: lock -; X64: decl -; X64-NOT: movl -; X32-LABEL: dec_32: -; X32-NOT: lock -; X32: decl -; X32-NOT: movl -; SLOW_INC-LABEL: dec_32: -; SLOW_INC-NOT: decl -; SLOW_INC-NOT: movl - %1 = load atomic i32, i32* %p acquire, align 4 - %2 = sub i32 %1, 1 - store atomic i32 %2, i32* %p monotonic, align 4 - ret void -} - -define void @dec_64(i64* %p) { -; X64-LABEL: dec_64: -; X64-NOT: lock -; X64: decq -; X64-NOT: movq -; We do not check X86-32 as it cannot do 'decq'. -; X32-LABEL: dec_64: -; SLOW_INC-LABEL: dec_64: -; SLOW_INC-NOT: decq -; SLOW_INC-NOT: movq - %1 = load atomic i64, i64* %p acquire, align 8 - %2 = sub i64 %1, 1 - store atomic i64 %2, i64* %p release, align 8 - ret void -} - -define void @dec_32_seq_cst(i32* %p) { -; X64-LABEL: dec_32_seq_cst: -; X64: xchgl -; X32-LABEL: dec_32_seq_cst: -; X32: xchgl - %1 = load atomic i32, i32* %p monotonic, align 4 - %2 = sub i32 %1, 1 - store atomic i32 %2, i32* %p seq_cst, align 4 - ret void -} - -; ----- FADD ----- - -define void @fadd_32r(float* %loc, float %val) { -; X64-LABEL: fadd_32r: -; X64-NOT: lock -; X64-NOT: mov -; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]] -; X64-NEXT: movss %[[XMM]], (%[[M]]) -; X32-LABEL: fadd_32r: -; Don't check x86-32. -; LLVM's SSE handling is conservative on x86-32 even without using atomics. - %floc = bitcast float* %loc to i32* - %1 = load atomic i32, i32* %floc seq_cst, align 4 - %2 = bitcast i32 %1 to float - %add = fadd float %2, %val - %3 = bitcast float %add to i32 - store atomic i32 %3, i32* %floc release, align 4 - ret void -} - -define void @fadd_64r(double* %loc, double %val) { -; X64-LABEL: fadd_64r: -; X64-NOT: lock -; X64-NOT: mov -; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]] -; X64-NEXT: movsd %[[XMM]], (%[[M]]) -; X32-LABEL: fadd_64r: -; Don't check x86-32 (see comment above). - %floc = bitcast double* %loc to i64* - %1 = load atomic i64, i64* %floc seq_cst, align 8 - %2 = bitcast i64 %1 to double - %add = fadd double %2, %val - %3 = bitcast double %add to i64 - store atomic i64 %3, i64* %floc release, align 8 - ret void -} - -@glob32 = global float 0.000000e+00, align 4 -@glob64 = global double 0.000000e+00, align 8 - -; Floating-point add to a global using an immediate. -define void @fadd_32g() { -; X64-LABEL: fadd_32g: -; X64-NOT: lock -; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] -; X64-NEXT: addss glob32(%rip), %[[XMM]] -; X64-NEXT: movss %[[XMM]], glob32(%rip) -; X32-LABEL: fadd_32g: -; Don't check x86-32 (see comment above). - %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 - %f = bitcast i32 %i to float - %add = fadd float %f, 1.000000e+00 - %s = bitcast float %add to i32 - store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 - ret void -} - -define void @fadd_64g() { -; X64-LABEL: fadd_64g: -; X64-NOT: lock -; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] -; X64-NEXT: addsd glob64(%rip), %[[XMM]] -; X64-NEXT: movsd %[[XMM]], glob64(%rip) -; X32-LABEL: fadd_64g: -; Don't check x86-32 (see comment above). - %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 - %f = bitcast i64 %i to double - %add = fadd double %f, 1.000000e+00 - %s = bitcast double %add to i64 - store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 - ret void -} - -; Floating-point add to a hard-coded immediate location using an immediate. -define void @fadd_32imm() { -; X64-LABEL: fadd_32imm: -; X64-NOT: lock -; X64: movl $3735928559, %e[[M:[a-z]+]] -; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] -; X64-NEXT: addss (%r[[M]]), %[[XMM]] -; X64-NEXT: movss %[[XMM]], (%r[[M]]) -; X32-LABEL: fadd_32imm: -; Don't check x86-32 (see comment above). - %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 - %f = bitcast i32 %i to float - %add = fadd float %f, 1.000000e+00 - %s = bitcast float %add to i32 - store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 - ret void -} - -define void @fadd_64imm() { -; X64-LABEL: fadd_64imm: -; X64-NOT: lock -; X64: movl $3735928559, %e[[M:[a-z]+]] -; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] -; X64-NEXT: addsd (%r[[M]]), %[[XMM]] -; X64-NEXT: movsd %[[XMM]], (%r[[M]]) -; X32-LABEL: fadd_64imm: -; Don't check x86-32 (see comment above). - %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 - %f = bitcast i64 %i to double - %add = fadd double %f, 1.000000e+00 - %s = bitcast double %add to i64 - store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 - ret void -} - -; Floating-point add to a stack location. -define void @fadd_32stack() { -; X64-LABEL: fadd_32stack: -; X64-NOT: lock -; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] -; X64-NEXT: addss [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]] -; X64-NEXT: movss %[[XMM]], [[STACKOFF]](%rsp) -; X32-LABEL: fadd_32stack: -; Don't check x86-32 (see comment above). - %ptr = alloca i32, align 4 - %bc3 = bitcast i32* %ptr to float* - %load = load atomic i32, i32* %ptr acquire, align 4 - %bc0 = bitcast i32 %load to float - %fadd = fadd float 1.000000e+00, %bc0 - %bc1 = bitcast float %fadd to i32 - store atomic i32 %bc1, i32* %ptr release, align 4 - ret void -} - -define void @fadd_64stack() { -; X64-LABEL: fadd_64stack: -; X64-NOT: lock -; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] -; X64-NEXT: addsd [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]] -; X64-NEXT: movsd %[[XMM]], [[STACKOFF]](%rsp) -; X32-LABEL: fadd_64stack: -; Don't check x86-32 (see comment above). - %ptr = alloca i64, align 8 - %bc3 = bitcast i64* %ptr to double* - %load = load atomic i64, i64* %ptr acquire, align 8 - %bc0 = bitcast i64 %load to double - %fadd = fadd double 1.000000e+00, %bc0 - %bc1 = bitcast double %fadd to i64 - store atomic i64 %bc1, i64* %ptr release, align 8 - ret void -} - -define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) { -; X64-LABEL: fadd_array: -; X64-NOT: lock -; X64: addsd ([[ADDR:%r..,%r..,8]]), %[[XMM:xmm[0-9]+]] -; X64-NEXT: movsd %[[XMM]], ([[ADDR]]) -; X32-LABEL: fadd_array: -; Don't check x86-32 (see comment above). -bb: - %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2 - %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8 - %tmp7 = bitcast i64 %tmp6 to double - %tmp8 = fadd double %tmp7, %arg1 - %tmp9 = bitcast double %tmp8 to i64 - store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8 - ret void -} |
