diff options
Diffstat (limited to 'test/CodeGen/aarch64-neon-ldst-one.c')
-rw-r--r-- | test/CodeGen/aarch64-neon-ldst-one.c | 1714 |
1 files changed, 270 insertions, 1444 deletions
diff --git a/test/CodeGen/aarch64-neon-ldst-one.c b/test/CodeGen/aarch64-neon-ldst-one.c index a3c5b140a0d2..592b2ceffa6a 100644 --- a/test/CodeGen/aarch64-neon-ldst-one.c +++ b/test/CodeGen/aarch64-neon-ldst-one.c @@ -300,58 +300,6 @@ poly64x1_t test_vld1_dup_p64(poly64_t *a) { return vld1_dup_p64(a); } -// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_dup_u8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint8x16x2_t [[TMP4]] -uint8x16x2_t test_vld2q_dup_u8(uint8_t *a) { - return vld2q_dup_u8(a); -} - -// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_dup_u16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP6]] -uint16x8x2_t test_vld2q_dup_u16(uint16_t *a) { - return vld2q_dup_u16(a); -} - -// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_dup_u32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP6]] -uint32x4x2_t test_vld2q_dup_u32(uint32_t *a) { - return vld2q_dup_u32(a); -} - // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 @@ -363,65 +311,13 @@ uint32x4x2_t test_vld2q_dup_u32(uint32_t *a) { // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] uint64x2x2_t test_vld2q_dup_u64(uint64_t *a) { return vld2q_dup_u64(a); } -// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int8x16x2_t [[TMP4]] -int8x16x2_t test_vld2q_dup_s8(int8_t *a) { - return vld2q_dup_s8(a); -} - -// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP6]] -int16x8x2_t test_vld2q_dup_s16(int16_t *a) { - return vld2q_dup_s16(a); -} - -// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP6]] -int32x4x2_t test_vld2q_dup_s32(int32_t *a) { - return vld2q_dup_s32(a); -} - // CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 @@ -433,49 +329,13 @@ int32x4x2_t test_vld2q_dup_s32(int32_t *a) { // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x2_t [[TMP6]] int64x2x2_t test_vld2q_dup_s64(int64_t *a) { return vld2q_dup_s64(a); } -// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_dup_f16(half* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* -// CHECK: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half> }* -// CHECK: store { <8 x half>, <8 x half> } [[VLD2]], { <8 x half>, <8 x half> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float16x8x2_t [[TMP6]] -float16x8x2_t test_vld2q_dup_f16(float16_t *a) { - return vld2q_dup_f16(a); -} - -// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* -// CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP6]] -float32x4x2_t test_vld2q_dup_f32(float32_t *a) { - return vld2q_dup_f32(a); -} - // CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 @@ -487,47 +347,13 @@ float32x4x2_t test_vld2q_dup_f32(float32_t *a) { // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x2_t [[TMP6]] float64x2x2_t test_vld2q_dup_f64(float64_t *a) { return vld2q_dup_f64(a); } -// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_dup_p8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly8x16x2_t [[TMP4]] -poly8x16x2_t test_vld2q_dup_p8(poly8_t *a) { - return vld2q_dup_p8(a); -} - -// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_dup_p16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP6]] -poly16x8x2_t test_vld2q_dup_p16(poly16_t *a) { - return vld2q_dup_p16(a); -} - // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 @@ -539,189 +365,13 @@ poly16x8x2_t test_vld2q_dup_p16(poly16_t *a) { // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] poly64x2x2_t test_vld2q_dup_p64(poly64_t *a) { return vld2q_dup_p64(a); } -// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_dup_u8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint8x8x2_t [[TMP4]] -uint8x8x2_t test_vld2_dup_u8(uint8_t *a) { - return vld2_dup_u8(a); -} - -// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_dup_u16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP6]] -uint16x4x2_t test_vld2_dup_u16(uint16_t *a) { - return vld2_dup_u16(a); -} - -// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_dup_u32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP6]] -uint32x2x2_t test_vld2_dup_u32(uint32_t *a) { - return vld2_dup_u32(a); -} - -// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_dup_u64(i64* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* -// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint64x1x2_t [[TMP6]] -uint64x1x2_t test_vld2_dup_u64(uint64_t *a) { - return vld2_dup_u64(a); -} - -// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int8x8x2_t [[TMP4]] -int8x8x2_t test_vld2_dup_s8(int8_t *a) { - return vld2_dup_s8(a); -} - -// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP6]] -int16x4x2_t test_vld2_dup_s16(int16_t *a) { - return vld2_dup_s16(a); -} - -// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP6]] -int32x2x2_t test_vld2_dup_s32(int32_t *a) { - return vld2_dup_s32(a); -} - -// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* -// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int64x1x2_t [[TMP6]] -int64x1x2_t test_vld2_dup_s64(int64_t *a) { - return vld2_dup_s64(a); -} - -// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_dup_f16(half* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* -// CHECK: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half> }* -// CHECK: store { <4 x half>, <4 x half> } [[VLD2]], { <4 x half>, <4 x half> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float16x4x2_t [[TMP6]] -float16x4x2_t test_vld2_dup_f16(float16_t *a) { - return vld2_dup_f16(a); -} - -// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* -// CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP6]] -float32x2x2_t test_vld2_dup_f32(float32_t *a) { - return vld2_dup_f32(a); -} - // CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 @@ -733,47 +383,13 @@ float32x2x2_t test_vld2_dup_f32(float32_t *a) { // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x2_t [[TMP6]] float64x1x2_t test_vld2_dup_f64(float64_t *a) { return vld2_dup_f64(a); } -// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_dup_p8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly8x8x2_t [[TMP4]] -poly8x8x2_t test_vld2_dup_p8(poly8_t *a) { - return vld2_dup_p8(a); -} - -// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_dup_p16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP6]] -poly16x4x2_t test_vld2_dup_p16(poly16_t *a) { - return vld2_dup_p16(a); -} - // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 @@ -785,68 +401,13 @@ poly16x4x2_t test_vld2_dup_p16(poly16_t *a) { // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] poly64x1x2_t test_vld2_dup_p64(poly64_t *a) { return vld2_dup_p64(a); } -// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_dup_u8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint8x16x3_t [[TMP4]] -uint8x16x3_t test_vld3q_dup_u8(uint8_t *a) { - return vld3q_dup_u8(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_dup_u16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x3_t [[TMP6]] -uint16x8x3_t test_vld3q_dup_u16(uint16_t *a) { - return vld3q_dup_u16(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_dup_u32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x3_t [[TMP6]] -uint32x4x3_t test_vld3q_dup_u32(uint32_t *a) { - return vld3q_dup_u32(a); - // [{{x[0-9]+|sp}}] -} - // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 @@ -858,7 +419,7 @@ uint32x4x3_t test_vld3q_dup_u32(uint32_t *a) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] uint64x2x3_t test_vld3q_dup_u64(uint64_t *a) { @@ -866,61 +427,6 @@ uint64x2x3_t test_vld3q_dup_u64(uint64_t *a) { // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int8x16x3_t [[TMP4]] -int8x16x3_t test_vld3q_dup_s8(int8_t *a) { - return vld3q_dup_s8(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x3_t [[TMP6]] -int16x8x3_t test_vld3q_dup_s16(int16_t *a) { - return vld3q_dup_s16(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x3_t [[TMP6]] -int32x4x3_t test_vld3q_dup_s32(int32_t *a) { - return vld3q_dup_s32(a); - // [{{x[0-9]+|sp}}] -} - // CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 @@ -932,7 +438,7 @@ int32x4x3_t test_vld3q_dup_s32(int32_t *a) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x3_t [[TMP6]] int64x2x3_t test_vld3q_dup_s64(int64_t *a) { @@ -940,44 +446,6 @@ int64x2x3_t test_vld3q_dup_s64(int64_t *a) { // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_dup_f16(half* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* -// CHECK: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half> }* -// CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float16x8x3_t [[TMP6]] -float16x8x3_t test_vld3q_dup_f16(float16_t *a) { - return vld3q_dup_f16(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* -// CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x3_t [[TMP6]] -float32x4x3_t test_vld3q_dup_f32(float32_t *a) { - return vld3q_dup_f32(a); - // [{{x[0-9]+|sp}}] -} - // CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 @@ -989,7 +457,7 @@ float32x4x3_t test_vld3q_dup_f32(float32_t *a) { // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x3_t [[TMP6]] float64x2x3_t test_vld3q_dup_f64(float64_t *a) { @@ -997,42 +465,6 @@ float64x2x3_t test_vld3q_dup_f64(float64_t *a) { // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_dup_p8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly8x16x3_t [[TMP4]] -poly8x16x3_t test_vld3q_dup_p8(poly8_t *a) { - return vld3q_dup_p8(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_dup_p16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x3_t [[TMP6]] -poly16x8x3_t test_vld3q_dup_p16(poly16_t *a) { - return vld3q_dup_p16(a); - // [{{x[0-9]+|sp}}] -} - // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 @@ -1044,7 +476,7 @@ poly16x8x3_t test_vld3q_dup_p16(poly16_t *a) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] poly64x2x3_t test_vld3q_dup_p64(poly64_t *a) { @@ -1052,192 +484,6 @@ poly64x2x3_t test_vld3q_dup_p64(poly64_t *a) { // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_dup_u8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint8x8x3_t [[TMP4]] -uint8x8x3_t test_vld3_dup_u8(uint8_t *a) { - return vld3_dup_u8(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_dup_u16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x3_t [[TMP6]] -uint16x4x3_t test_vld3_dup_u16(uint16_t *a) { - return vld3_dup_u16(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_dup_u32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x3_t [[TMP6]] -uint32x2x3_t test_vld3_dup_u32(uint32_t *a) { - return vld3_dup_u32(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_dup_u64(i64* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* -// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint64x1x3_t [[TMP6]] -uint64x1x3_t test_vld3_dup_u64(uint64_t *a) { - return vld3_dup_u64(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int8x8x3_t [[TMP4]] -int8x8x3_t test_vld3_dup_s8(int8_t *a) { - return vld3_dup_s8(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x3_t [[TMP6]] -int16x4x3_t test_vld3_dup_s16(int16_t *a) { - return vld3_dup_s16(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x3_t [[TMP6]] -int32x2x3_t test_vld3_dup_s32(int32_t *a) { - return vld3_dup_s32(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* -// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int64x1x3_t [[TMP6]] -int64x1x3_t test_vld3_dup_s64(int64_t *a) { - return vld3_dup_s64(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_dup_f16(half* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* -// CHECK: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half> }* -// CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float16x4x3_t [[TMP6]] -float16x4x3_t test_vld3_dup_f16(float16_t *a) { - return vld3_dup_f16(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* -// CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x3_t [[TMP6]] -float32x2x3_t test_vld3_dup_f32(float32_t *a) { - return vld3_dup_f32(a); - // [{{x[0-9]+|sp}}] -} - // CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 @@ -1249,7 +495,7 @@ float32x2x3_t test_vld3_dup_f32(float32_t *a) { // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x3_t [[TMP6]] float64x1x3_t test_vld3_dup_f64(float64_t *a) { @@ -1257,42 +503,6 @@ float64x1x3_t test_vld3_dup_f64(float64_t *a) { // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_dup_p8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly8x8x3_t [[TMP4]] -poly8x8x3_t test_vld3_dup_p8(poly8_t *a) { - return vld3_dup_p8(a); - // [{{x[0-9]+|sp}}] -} - -// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_dup_p16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x3_t [[TMP6]] -poly16x4x3_t test_vld3_dup_p16(poly16_t *a) { - return vld3_dup_p16(a); - // [{{x[0-9]+|sp}}] -} - // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 @@ -1304,7 +514,7 @@ poly16x4x3_t test_vld3_dup_p16(poly16_t *a) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] poly64x1x3_t test_vld3_dup_p64(poly64_t *a) { @@ -1312,58 +522,6 @@ poly64x1x3_t test_vld3_dup_p64(poly64_t *a) { // [{{x[0-9]+|sp}}] } -// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_dup_u8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint8x16x4_t [[TMP4]] -uint8x16x4_t test_vld4q_dup_u8(uint8_t *a) { - return vld4q_dup_u8(a); -} - -// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_dup_u16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x4_t [[TMP6]] -uint16x8x4_t test_vld4q_dup_u16(uint16_t *a) { - return vld4q_dup_u16(a); -} - -// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_dup_u32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x4_t [[TMP6]] -uint32x4x4_t test_vld4q_dup_u32(uint32_t *a) { - return vld4q_dup_u32(a); -} - // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 @@ -1375,65 +533,13 @@ uint32x4x4_t test_vld4q_dup_u32(uint32_t *a) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] uint64x2x4_t test_vld4q_dup_u64(uint64_t *a) { return vld4q_dup_u64(a); } -// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int8x16x4_t [[TMP4]] -int8x16x4_t test_vld4q_dup_s8(int8_t *a) { - return vld4q_dup_s8(a); -} - -// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x4_t [[TMP6]] -int16x8x4_t test_vld4q_dup_s16(int16_t *a) { - return vld4q_dup_s16(a); -} - -// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x4_t [[TMP6]] -int32x4x4_t test_vld4q_dup_s32(int32_t *a) { - return vld4q_dup_s32(a); -} - // CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 @@ -1445,49 +551,13 @@ int32x4x4_t test_vld4q_dup_s32(int32_t *a) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x4_t [[TMP6]] int64x2x4_t test_vld4q_dup_s64(int64_t *a) { return vld4q_dup_s64(a); } -// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_dup_f16(half* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* -// CHECK: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* -// CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float16x8x4_t [[TMP6]] -float16x8x4_t test_vld4q_dup_f16(float16_t *a) { - return vld4q_dup_f16(a); -} - -// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* -// CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x4_t [[TMP6]] -float32x4x4_t test_vld4q_dup_f32(float32_t *a) { - return vld4q_dup_f32(a); -} - // CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 @@ -1499,47 +569,13 @@ float32x4x4_t test_vld4q_dup_f32(float32_t *a) { // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x4_t [[TMP6]] float64x2x4_t test_vld4q_dup_f64(float64_t *a) { return vld4q_dup_f64(a); } -// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_dup_p8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly8x16x4_t [[TMP4]] -poly8x16x4_t test_vld4q_dup_p8(poly8_t *a) { - return vld4q_dup_p8(a); -} - -// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_dup_p16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x4_t [[TMP6]] -poly16x8x4_t test_vld4q_dup_p16(poly16_t *a) { - return vld4q_dup_p16(a); -} - // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 @@ -1551,189 +587,13 @@ poly16x8x4_t test_vld4q_dup_p16(poly16_t *a) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] poly64x2x4_t test_vld4q_dup_p64(poly64_t *a) { return vld4q_dup_p64(a); } -// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_dup_u8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint8x8x4_t [[TMP4]] -uint8x8x4_t test_vld4_dup_u8(uint8_t *a) { - return vld4_dup_u8(a); -} - -// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_dup_u16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x4_t [[TMP6]] -uint16x4x4_t test_vld4_dup_u16(uint16_t *a) { - return vld4_dup_u16(a); -} - -// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_dup_u32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x4_t [[TMP6]] -uint32x2x4_t test_vld4_dup_u32(uint32_t *a) { - return vld4_dup_u32(a); -} - -// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_dup_u64(i64* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* -// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint64x1x4_t [[TMP6]] -uint64x1x4_t test_vld4_dup_u64(uint64_t *a) { - return vld4_dup_u64(a); -} - -// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int8x8x4_t [[TMP4]] -int8x8x4_t test_vld4_dup_s8(int8_t *a) { - return vld4_dup_s8(a); -} - -// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x4_t [[TMP6]] -int16x4x4_t test_vld4_dup_s16(int16_t *a) { - return vld4_dup_s16(a); -} - -// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x4_t [[TMP6]] -int32x2x4_t test_vld4_dup_s32(int32_t *a) { - return vld4_dup_s32(a); -} - -// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* -// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int64x1x4_t [[TMP6]] -int64x1x4_t test_vld4_dup_s64(int64_t *a) { - return vld4_dup_s64(a); -} - -// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_dup_f16(half* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to half* -// CHECK: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* -// CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float16x4x4_t [[TMP6]] -float16x4x4_t test_vld4_dup_f16(float16_t *a) { - return vld4_dup_f16(a); -} - -// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* -// CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x4_t [[TMP6]] -float32x2x4_t test_vld4_dup_f32(float32_t *a) { - return vld4_dup_f32(a); -} - // CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 @@ -1745,47 +605,13 @@ float32x2x4_t test_vld4_dup_f32(float32_t *a) { // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x4_t [[TMP6]] float64x1x4_t test_vld4_dup_f64(float64_t *a) { return vld4_dup_f64(a); } -// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_dup_p8(i8* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) -// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly8x8x4_t [[TMP4]] -poly8x8x4_t test_vld4_dup_p8(poly8_t *a) { - return vld4_dup_p8(a); -} - -// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_dup_p16(i16* %a) #0 { -// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 -// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* -// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) -// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x4_t [[TMP6]] -poly16x4x4_t test_vld4_dup_p16(poly16_t *a) { - return vld4_dup_p16(a); -} - // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #0 { // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 @@ -1797,7 +623,7 @@ poly16x4x4_t test_vld4_dup_p16(poly16_t *a) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false) // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] poly64x1x4_t test_vld4_dup_p64(poly64_t *a) { @@ -2125,7 +951,7 @@ poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) { // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2138,7 +964,7 @@ poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) { // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x2_t [[TMP8]] int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { @@ -2154,7 +980,7 @@ int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2167,7 +993,7 @@ int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x2_t [[TMP8]] uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { @@ -2183,7 +1009,7 @@ uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2196,7 +1022,7 @@ uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 16 [[TMP7]], i64 32, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x2_t [[TMP8]] poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { @@ -2212,7 +1038,7 @@ poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2228,7 +1054,7 @@ poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x3_t [[TMP9]] int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { @@ -2244,7 +1070,7 @@ int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[SRC]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -2260,7 +1086,7 @@ int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x3_t [[TMP9]] uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { @@ -2276,7 +1102,7 @@ uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2294,7 +1120,7 @@ uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x2_t [[TMP13]] uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { @@ -2310,7 +1136,7 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 @@ -2328,7 +1154,7 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x2_t [[TMP13]] uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { @@ -2344,7 +1170,7 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2362,7 +1188,7 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x2_t [[TMP13]] uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { @@ -2378,7 +1204,7 @@ uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2396,7 +1222,7 @@ uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x2_t [[TMP13]] int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { @@ -2412,7 +1238,7 @@ int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 @@ -2430,7 +1256,7 @@ int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x2_t [[TMP13]] int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { @@ -2446,7 +1272,7 @@ int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2464,7 +1290,7 @@ int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x2_t [[TMP13]] int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { @@ -2480,7 +1306,7 @@ int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2498,7 +1324,7 @@ int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { // CHECK: store { <8 x half>, <8 x half> } [[VLD2_LANE]], { <8 x half>, <8 x half> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x2_t [[TMP13]] float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { @@ -2514,7 +1340,7 @@ float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 @@ -2532,7 +1358,7 @@ float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { // CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x2_t [[TMP13]] float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { @@ -2548,7 +1374,7 @@ float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2566,7 +1392,7 @@ float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { // CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], { <2 x double>, <2 x double> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x2_t [[TMP13]] float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { @@ -2582,7 +1408,7 @@ float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 @@ -2600,7 +1426,7 @@ float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x2_t [[TMP13]] poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { @@ -2616,7 +1442,7 @@ poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 @@ -2634,7 +1460,7 @@ poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP11]], i8* align 16 [[TMP12]], i64 32, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x2_t [[TMP13]] poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { @@ -2650,7 +1476,7 @@ poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -2663,7 +1489,7 @@ poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x2_t [[TMP8]] uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { @@ -2679,7 +1505,7 @@ uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 @@ -2697,7 +1523,7 @@ uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x2_t [[TMP13]] uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { @@ -2713,7 +1539,7 @@ uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 @@ -2731,7 +1557,7 @@ uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x2_t [[TMP13]] uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { @@ -2747,7 +1573,7 @@ uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 @@ -2765,7 +1591,7 @@ uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x2_t [[TMP13]] uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { @@ -2781,7 +1607,7 @@ uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -2794,7 +1620,7 @@ uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x2_t [[TMP8]] int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { @@ -2810,7 +1636,7 @@ int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 @@ -2828,7 +1654,7 @@ int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x2_t [[TMP13]] int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { @@ -2844,7 +1670,7 @@ int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 @@ -2862,7 +1688,7 @@ int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x2_t [[TMP13]] int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { @@ -2878,7 +1704,7 @@ int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 @@ -2896,7 +1722,7 @@ int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x2_t [[TMP13]] int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { @@ -2912,7 +1738,7 @@ int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 @@ -2930,7 +1756,7 @@ int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { // CHECK: store { <4 x half>, <4 x half> } [[VLD2_LANE]], { <4 x half>, <4 x half> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x2_t [[TMP13]] float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { @@ -2946,7 +1772,7 @@ float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 @@ -2964,7 +1790,7 @@ float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { // CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x2_t [[TMP13]] float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { @@ -2980,7 +1806,7 @@ float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 @@ -2998,7 +1824,7 @@ float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { // CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], { <1 x double>, <1 x double> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x2_t [[TMP13]] float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { @@ -3014,7 +1840,7 @@ float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -3027,7 +1853,7 @@ float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 16, i1 false) // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x2_t [[TMP8]] poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { @@ -3043,7 +1869,7 @@ poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 @@ -3061,7 +1887,7 @@ poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x2_t [[TMP13]] poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { @@ -3077,7 +1903,7 @@ poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 @@ -3095,7 +1921,7 @@ poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x2_t [[TMP13]] poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { @@ -3111,7 +1937,7 @@ poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3134,7 +1960,7 @@ poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x3_t [[TMP16]] uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { @@ -3150,7 +1976,7 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 @@ -3173,7 +1999,7 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x3_t [[TMP16]] uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { @@ -3189,7 +2015,7 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3212,7 +2038,7 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x3_t [[TMP16]] uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { @@ -3228,7 +2054,7 @@ uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3251,7 +2077,7 @@ uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x3_t [[TMP16]] int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { @@ -3267,7 +2093,7 @@ int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 @@ -3290,7 +2116,7 @@ int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x3_t [[TMP16]] int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { @@ -3306,7 +2132,7 @@ int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3329,7 +2155,7 @@ int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x3_t [[TMP16]] int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { @@ -3345,7 +2171,7 @@ int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3368,7 +2194,7 @@ int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], { <8 x half>, <8 x half>, <8 x half> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x3_t [[TMP16]] float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { @@ -3384,7 +2210,7 @@ float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 @@ -3407,7 +2233,7 @@ float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x3_t [[TMP16]] float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { @@ -3423,7 +2249,7 @@ float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3446,7 +2272,7 @@ float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x3_t [[TMP16]] float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { @@ -3462,7 +2288,7 @@ float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -3478,7 +2304,7 @@ float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP7]], i8* align 16 [[TMP8]], i64 48, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x3_t [[TMP9]] poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { @@ -3494,7 +2320,7 @@ poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 @@ -3517,7 +2343,7 @@ poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x3_t [[TMP16]] poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { @@ -3533,7 +2359,7 @@ poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 @@ -3556,7 +2382,7 @@ poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP14]], i8* align 16 [[TMP15]], i64 48, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x3_t [[TMP16]] poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { @@ -3572,7 +2398,7 @@ poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -3588,7 +2414,7 @@ poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x3_t [[TMP9]] uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { @@ -3604,7 +2430,7 @@ uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 @@ -3627,7 +2453,7 @@ uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x3_t [[TMP16]] uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { @@ -3643,7 +2469,7 @@ uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 @@ -3666,7 +2492,7 @@ uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x3_t [[TMP16]] uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { @@ -3682,7 +2508,7 @@ uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 @@ -3705,7 +2531,7 @@ uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x3_t [[TMP16]] uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { @@ -3721,7 +2547,7 @@ uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -3737,7 +2563,7 @@ uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x3_t [[TMP9]] int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { @@ -3753,7 +2579,7 @@ int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 @@ -3776,7 +2602,7 @@ int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x3_t [[TMP16]] int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { @@ -3792,7 +2618,7 @@ int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 @@ -3815,7 +2641,7 @@ int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x3_t [[TMP16]] int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { @@ -3831,7 +2657,7 @@ int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 @@ -3854,7 +2680,7 @@ int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x3_t [[TMP16]] int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { @@ -3870,7 +2696,7 @@ int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 @@ -3893,7 +2719,7 @@ int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], { <4 x half>, <4 x half>, <4 x half> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x3_t [[TMP16]] float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { @@ -3909,7 +2735,7 @@ float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 @@ -3932,7 +2758,7 @@ float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x3_t [[TMP16]] float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { @@ -3948,7 +2774,7 @@ float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 @@ -3971,7 +2797,7 @@ float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x3_t [[TMP16]] float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { @@ -3987,7 +2813,7 @@ float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -4003,7 +2829,7 @@ float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 24, i1 false) // CHECK: [[TMP9:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x3_t [[TMP9]] poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { @@ -4019,7 +2845,7 @@ poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 @@ -4042,7 +2868,7 @@ poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x3_t [[TMP16]] poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { @@ -4058,7 +2884,7 @@ poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 @@ -4081,7 +2907,7 @@ poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x3_t [[TMP16]] poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { @@ -4097,7 +2923,7 @@ poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -4116,7 +2942,7 @@ poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint8x16x4_t [[TMP10]] uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { @@ -4132,7 +2958,7 @@ uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4160,7 +2986,7 @@ uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint16x8x4_t [[TMP19]] uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { @@ -4176,7 +3002,7 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 @@ -4204,7 +3030,7 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint32x4x4_t [[TMP19]] uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { @@ -4220,7 +3046,7 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4248,7 +3074,7 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.uint64x2x4_t [[TMP19]] uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { @@ -4264,7 +3090,7 @@ uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -4283,7 +3109,7 @@ uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int8x16x4_t [[TMP10]] int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { @@ -4299,7 +3125,7 @@ int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4327,7 +3153,7 @@ int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int16x8x4_t [[TMP19]] int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { @@ -4343,7 +3169,7 @@ int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 @@ -4371,7 +3197,7 @@ int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int32x4x4_t [[TMP19]] int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { @@ -4387,7 +3213,7 @@ int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4415,7 +3241,7 @@ int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.int64x2x4_t [[TMP19]] int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { @@ -4431,7 +3257,7 @@ int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4459,7 +3285,7 @@ int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], { <8 x half>, <8 x half>, <8 x half>, <8 x half> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float16x8x4_t [[TMP19]] float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { @@ -4475,7 +3301,7 @@ float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 @@ -4503,7 +3329,7 @@ float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float32x4x4_t [[TMP19]] float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { @@ -4519,7 +3345,7 @@ float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4547,7 +3373,7 @@ float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.float64x2x4_t [[TMP19]] float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { @@ -4563,7 +3389,7 @@ float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 @@ -4582,7 +3408,7 @@ float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP8]], i8* align 16 [[TMP9]], i64 64, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly8x16x4_t [[TMP10]] poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { @@ -4598,7 +3424,7 @@ poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 @@ -4626,7 +3452,7 @@ poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly16x8x4_t [[TMP19]] poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { @@ -4642,7 +3468,7 @@ poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 @@ -4670,7 +3496,7 @@ poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP17]], i8* align 16 [[TMP18]], i64 64, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 // CHECK: ret %struct.poly64x2x4_t [[TMP19]] poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { @@ -4686,7 +3512,7 @@ poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -4705,7 +3531,7 @@ poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint8x8x4_t [[TMP10]] uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { @@ -4721,7 +3547,7 @@ uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 @@ -4749,7 +3575,7 @@ uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint16x4x4_t [[TMP19]] uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { @@ -4765,7 +3591,7 @@ uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 @@ -4793,7 +3619,7 @@ uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint32x2x4_t [[TMP19]] uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { @@ -4809,7 +3635,7 @@ uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 @@ -4837,7 +3663,7 @@ uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.uint64x1x4_t [[TMP19]] uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { @@ -4853,7 +3679,7 @@ uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -4872,7 +3698,7 @@ uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int8x8x4_t [[TMP10]] int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { @@ -4888,7 +3714,7 @@ int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 @@ -4916,7 +3742,7 @@ int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int16x4x4_t [[TMP19]] int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { @@ -4932,7 +3758,7 @@ int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 @@ -4960,7 +3786,7 @@ int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int32x2x4_t [[TMP19]] int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { @@ -4976,7 +3802,7 @@ int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 @@ -5004,7 +3830,7 @@ int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.int64x1x4_t [[TMP19]] int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { @@ -5020,7 +3846,7 @@ int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 @@ -5048,7 +3874,7 @@ int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], { <4 x half>, <4 x half>, <4 x half>, <4 x half> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float16x4x4_t [[TMP19]] float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { @@ -5064,7 +3890,7 @@ float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 @@ -5092,7 +3918,7 @@ float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float32x2x4_t [[TMP19]] float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { @@ -5108,7 +3934,7 @@ float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 @@ -5136,7 +3962,7 @@ float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.float64x1x4_t [[TMP19]] float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { @@ -5152,7 +3978,7 @@ float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 @@ -5171,7 +3997,7 @@ float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 32, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly8x8x4_t [[TMP10]] poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { @@ -5187,7 +4013,7 @@ poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 @@ -5215,7 +4041,7 @@ poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly16x4x4_t [[TMP19]] poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { @@ -5231,7 +4057,7 @@ poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 @@ -5259,7 +4085,7 @@ poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 32, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 // CHECK: ret %struct.poly64x1x4_t [[TMP19]] poly64x1x4_t test_vld4_lane_p64(poly64_t *a, poly64x1x4_t b) { @@ -5585,7 +4411,7 @@ void test_vst1_lane_p64(poly64_t *a, poly64x1_t b) { // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -5605,7 +4431,7 @@ void test_vst2q_lane_u8(uint8_t *a, uint8x16x2_t b) { // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -5630,7 +4456,7 @@ void test_vst2q_lane_u16(uint16_t *a, uint16x8x2_t b) { // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -5655,7 +4481,7 @@ void test_vst2q_lane_u32(uint32_t *a, uint32x4x2_t b) { // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -5680,7 +4506,7 @@ void test_vst2q_lane_u64(uint64_t *a, uint64x2x2_t b) { // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -5700,7 +4526,7 @@ void test_vst2q_lane_s8(int8_t *a, int8x16x2_t b) { // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -5725,7 +4551,7 @@ void test_vst2q_lane_s16(int16_t *a, int16x8x2_t b) { // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -5750,7 +4576,7 @@ void test_vst2q_lane_s32(int32_t *a, int32x4x2_t b) { // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -5775,7 +4601,7 @@ void test_vst2q_lane_s64(int64_t *a, int64x2x2_t b) { // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -5800,7 +4626,7 @@ void test_vst2q_lane_f16(float16_t *a, float16x8x2_t b) { // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -5825,7 +4651,7 @@ void test_vst2q_lane_f32(float32_t *a, float32x4x2_t b) { // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -5850,7 +4676,7 @@ void test_vst2q_lane_f64(float64_t *a, float64x2x2_t b) { // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -5870,7 +4696,7 @@ void test_vst2q_lane_p8(poly8_t *a, poly8x16x2_t b) { // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -5895,7 +4721,7 @@ void test_vst2q_lane_p16(poly16_t *a, poly16x8x2_t b) { // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -5920,7 +4746,7 @@ void test_vst2q_lane_p64(poly64_t *a, poly64x2x2_t b) { // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -5940,7 +4766,7 @@ void test_vst2_lane_u8(uint8_t *a, uint8x8x2_t b) { // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -5965,7 +4791,7 @@ void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) { // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -5990,7 +4816,7 @@ void test_vst2_lane_u32(uint32_t *a, uint32x2x2_t b) { // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6015,7 +4841,7 @@ void test_vst2_lane_u64(uint64_t *a, uint64x1x2_t b) { // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6035,7 +4861,7 @@ void test_vst2_lane_s8(int8_t *a, int8x8x2_t b) { // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6060,7 +4886,7 @@ void test_vst2_lane_s16(int16_t *a, int16x4x2_t b) { // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -6085,7 +4911,7 @@ void test_vst2_lane_s32(int32_t *a, int32x2x2_t b) { // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6110,7 +4936,7 @@ void test_vst2_lane_s64(int64_t *a, int64x1x2_t b) { // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -6135,7 +4961,7 @@ void test_vst2_lane_f16(float16_t *a, float16x4x2_t b) { // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -6160,7 +4986,7 @@ void test_vst2_lane_f32(float32_t *a, float32x2x2_t b) { // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -6185,7 +5011,7 @@ void test_vst2_lane_f64(float64_t *a, float64x1x2_t b) { // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6205,7 +5031,7 @@ void test_vst2_lane_p8(poly8_t *a, poly8x8x2_t b) { // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6230,7 +5056,7 @@ void test_vst2_lane_p16(poly16_t *a, poly16x4x2_t b) { // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6255,7 +5081,7 @@ void test_vst2_lane_p64(poly64_t *a, poly64x1x2_t b) { // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -6278,7 +5104,7 @@ void test_vst3q_lane_u8(uint8_t *a, uint8x16x3_t b) { // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -6308,7 +5134,7 @@ void test_vst3q_lane_u16(uint16_t *a, uint16x8x3_t b) { // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -6338,7 +5164,7 @@ void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) { // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -6368,7 +5194,7 @@ void test_vst3q_lane_u64(uint64_t *a, uint64x2x3_t b) { // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -6391,7 +5217,7 @@ void test_vst3q_lane_s8(int8_t *a, int8x16x3_t b) { // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -6421,7 +5247,7 @@ void test_vst3q_lane_s16(int16_t *a, int16x8x3_t b) { // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -6451,7 +5277,7 @@ void test_vst3q_lane_s32(int32_t *a, int32x4x3_t b) { // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -6481,7 +5307,7 @@ void test_vst3q_lane_s64(int64_t *a, int64x2x3_t b) { // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -6511,7 +5337,7 @@ void test_vst3q_lane_f16(float16_t *a, float16x8x3_t b) { // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -6541,7 +5367,7 @@ void test_vst3q_lane_f32(float32_t *a, float32x4x3_t b) { // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -6571,7 +5397,7 @@ void test_vst3q_lane_f64(float64_t *a, float64x2x3_t b) { // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -6594,7 +5420,7 @@ void test_vst3q_lane_p8(poly8_t *a, poly8x16x3_t b) { // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -6624,7 +5450,7 @@ void test_vst3q_lane_p16(poly16_t *a, poly16x8x3_t b) { // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -6654,7 +5480,7 @@ void test_vst3q_lane_p64(poly64_t *a, poly64x2x3_t b) { // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6677,7 +5503,7 @@ void test_vst3_lane_u8(uint8_t *a, uint8x8x3_t b) { // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6707,7 +5533,7 @@ void test_vst3_lane_u16(uint16_t *a, uint16x4x3_t b) { // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -6737,7 +5563,7 @@ void test_vst3_lane_u32(uint32_t *a, uint32x2x3_t b) { // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6767,7 +5593,7 @@ void test_vst3_lane_u64(uint64_t *a, uint64x1x3_t b) { // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6790,7 +5616,7 @@ void test_vst3_lane_s8(int8_t *a, int8x8x3_t b) { // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -6820,7 +5646,7 @@ void test_vst3_lane_s16(int16_t *a, int16x4x3_t b) { // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -6850,7 +5676,7 @@ void test_vst3_lane_s32(int32_t *a, int32x2x3_t b) { // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -6880,7 +5706,7 @@ void test_vst3_lane_s64(int64_t *a, int64x1x3_t b) { // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -6910,7 +5736,7 @@ void test_vst3_lane_f16(float16_t *a, float16x4x3_t b) { // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -6940,7 +5766,7 @@ void test_vst3_lane_f32(float32_t *a, float32x2x3_t b) { // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -6970,7 +5796,7 @@ void test_vst3_lane_f64(float64_t *a, float64x1x3_t b) { // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -6993,7 +5819,7 @@ void test_vst3_lane_p8(poly8_t *a, poly8x8x3_t b) { // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7023,7 +5849,7 @@ void test_vst3_lane_p16(poly16_t *a, poly16x4x3_t b) { // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -7053,7 +5879,7 @@ void test_vst3_lane_p64(poly64_t *a, poly64x1x3_t b) { // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -7079,7 +5905,7 @@ void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) { // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -7114,7 +5940,7 @@ void test_vst4q_lane_u16(uint16_t *a, uint16x8x4_t b) { // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -7149,7 +5975,7 @@ void test_vst4q_lane_u32(uint32_t *a, uint32x4x4_t b) { // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -7184,7 +6010,7 @@ void test_vst4q_lane_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -7210,7 +6036,7 @@ void test_vst4q_lane_s8(int8_t *a, int8x16x4_t b) { // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -7245,7 +6071,7 @@ void test_vst4q_lane_s16(int16_t *a, int16x8x4_t b) { // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 @@ -7280,7 +6106,7 @@ void test_vst4q_lane_s32(int32_t *a, int32x4x4_t b) { // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -7315,7 +6141,7 @@ void test_vst4q_lane_s64(int64_t *a, int64x2x4_t b) { // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 @@ -7350,7 +6176,7 @@ void test_vst4q_lane_f16(float16_t *a, float16x8x4_t b) { // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 @@ -7385,7 +6211,7 @@ void test_vst4q_lane_f32(float32_t *a, float32x4x4_t b) { // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 @@ -7420,7 +6246,7 @@ void test_vst4q_lane_f64(float64_t *a, float64x2x4_t b) { // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 @@ -7446,7 +6272,7 @@ void test_vst4q_lane_p8(poly8_t *a, poly8x16x4_t b) { // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 @@ -7481,7 +6307,7 @@ void test_vst4q_lane_p16(poly16_t *a, poly16x8x4_t b) { // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 @@ -7516,7 +6342,7 @@ void test_vst4q_lane_p64(poly64_t *a, poly64x2x4_t b) { // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -7542,7 +6368,7 @@ void test_vst4_lane_u8(uint8_t *a, uint8x8x4_t b) { // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7577,7 +6403,7 @@ void test_vst4_lane_u16(uint16_t *a, uint16x4x4_t b) { // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -7612,7 +6438,7 @@ void test_vst4_lane_u32(uint32_t *a, uint32x2x4_t b) { // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -7647,7 +6473,7 @@ void test_vst4_lane_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -7673,7 +6499,7 @@ void test_vst4_lane_s8(int8_t *a, int8x8x4_t b) { // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7708,7 +6534,7 @@ void test_vst4_lane_s16(int16_t *a, int16x4x4_t b) { // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 @@ -7743,7 +6569,7 @@ void test_vst4_lane_s32(int32_t *a, int32x2x4_t b) { // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 @@ -7778,7 +6604,7 @@ void test_vst4_lane_s64(int64_t *a, int64x1x4_t b) { // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 @@ -7813,7 +6639,7 @@ void test_vst4_lane_f16(float16_t *a, float16x4x4_t b) { // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 @@ -7848,7 +6674,7 @@ void test_vst4_lane_f32(float32_t *a, float32x2x4_t b) { // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 @@ -7883,7 +6709,7 @@ void test_vst4_lane_f64(float64_t *a, float64x1x4_t b) { // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 @@ -7909,7 +6735,7 @@ void test_vst4_lane_p8(poly8_t *a, poly8x8x4_t b) { // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 @@ -7944,7 +6770,7 @@ void test_vst4_lane_p16(poly16_t *a, poly16x4x4_t b) { // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 |