diff options
Diffstat (limited to 'test/CodeGen/X86/fold-and-shift.ll')
-rw-r--r-- | test/CodeGen/X86/fold-and-shift.ll | 82 |
1 files changed, 69 insertions, 13 deletions
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll index 9f79f7723b33..93baa0e0eee0 100644 --- a/test/CodeGen/X86/fold-and-shift.ll +++ b/test/CodeGen/X86/fold-and-shift.ll @@ -1,21 +1,77 @@ -; RUN: llc < %s -march=x86 | not grep and +; RUN: llc < %s -march=x86 | FileCheck %s define i32 @t1(i8* %X, i32 %i) { +; CHECK: t1: +; CHECK-NOT: and +; CHECK: movzbl +; CHECK: movl (%{{...}},%{{...}},4), +; CHECK: ret + entry: - %tmp2 = shl i32 %i, 2 ; <i32> [#uses=1] - %tmp4 = and i32 %tmp2, 1020 ; <i32> [#uses=1] - %tmp7 = getelementptr i8* %X, i32 %tmp4 ; <i8*> [#uses=1] - %tmp78 = bitcast i8* %tmp7 to i32* ; <i32*> [#uses=1] - %tmp9 = load i32* %tmp78, align 4 ; <i32> [#uses=1] - ret i32 %tmp9 + %tmp2 = shl i32 %i, 2 + %tmp4 = and i32 %tmp2, 1020 + %tmp7 = getelementptr i8* %X, i32 %tmp4 + %tmp78 = bitcast i8* %tmp7 to i32* + %tmp9 = load i32* %tmp78 + ret i32 %tmp9 } define i32 @t2(i16* %X, i32 %i) { +; CHECK: t2: +; CHECK-NOT: and +; CHECK: movzwl +; CHECK: movl (%{{...}},%{{...}},4), +; CHECK: ret + +entry: + %tmp2 = shl i32 %i, 1 + %tmp4 = and i32 %tmp2, 131070 + %tmp7 = getelementptr i16* %X, i32 %tmp4 + %tmp78 = bitcast i16* %tmp7 to i32* + %tmp9 = load i32* %tmp78 + ret i32 %tmp9 +} + +define i32 @t3(i16* %i.ptr, i32* %arr) { +; This case is tricky. The lshr followed by a gep will produce a lshr followed +; by an and to remove the low bits. This can be simplified by doing the lshr by +; a greater constant and using the addressing mode to scale the result back up. +; To make matters worse, because of the two-phase zext of %i and their reuse in +; the function, the DAG can get confusing trying to re-use both of them and +; prevent easy analysis of the mask in order to match this. +; CHECK: t3: +; CHECK-NOT: and +; CHECK: shrl +; CHECK: addl (%{{...}},%{{...}},4), +; CHECK: ret + +entry: + %i = load i16* %i.ptr + %i.zext = zext i16 %i to i32 + %index = lshr i32 %i.zext, 11 + %val.ptr = getelementptr inbounds i32* %arr, i32 %index + %val = load i32* %val.ptr + %sum = add i32 %val, %i.zext + ret i32 %sum +} + +define i32 @t4(i16* %i.ptr, i32* %arr) { +; A version of @t3 that has more zero extends and more re-use of intermediate +; values. This exercise slightly different bits of canonicalization. +; CHECK: t4: +; CHECK-NOT: and +; CHECK: shrl +; CHECK: addl (%{{...}},%{{...}},4), +; CHECK: ret + entry: - %tmp2 = shl i32 %i, 1 ; <i32> [#uses=1] - %tmp4 = and i32 %tmp2, 131070 ; <i32> [#uses=1] - %tmp7 = getelementptr i16* %X, i32 %tmp4 ; <i16*> [#uses=1] - %tmp78 = bitcast i16* %tmp7 to i32* ; <i32*> [#uses=1] - %tmp9 = load i32* %tmp78, align 4 ; <i32> [#uses=1] - ret i32 %tmp9 + %i = load i16* %i.ptr + %i.zext = zext i16 %i to i32 + %index = lshr i32 %i.zext, 11 + %index.zext = zext i32 %index to i64 + %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext + %val = load i32* %val.ptr + %sum.1 = add i32 %val, %i.zext + %sum.2 = add i32 %sum.1, %index + ret i32 %sum.2 } |