databases/caterva/files/patch-contribs_c-blosc2_blosc_shuffle-altivec.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

--- contribs/c-blosc2/blosc/shuffle-altivec.c.orig	2023-03-23 20:25:30 UTC
+++ contribs/c-blosc2/blosc/shuffle-altivec.c
@@ -25,7 +25,7 @@ shuffle2_altivec(uint8_t* const dest, const uint8_t* c
                  const int32_t vectorizable_elements, const int32_t total_elements){
   static const int32_t bytesoftype = 2;
   uint32_t i, j;
-  __vector uint8_t xmm0[2];
+  __vector unsigned char xmm0[2];
 
   for (j = 0; j < vectorizable_elements; j += 16){
     /* Fetch 16 elements (32 bytes) */
@@ -47,7 +47,7 @@ shuffle4_altivec(uint8_t* const dest, const uint8_t* c
                  const int32_t vectorizable_elements, const int32_t total_elements){
   static const int32_t bytesoftype = 4;
   int32_t i, j;
-  __vector uint8_t xmm0[4];
+  __vector unsigned char xmm0[4];
 
   for (j = 0; j < vectorizable_elements; j += 16)
   {
@@ -73,7 +73,7 @@ shuffle8_altivec(uint8_t* const dest, const uint8_t* c
                  const int32_t vectorizable_elements, const int32_t total_elements) {
   static const uint8_t bytesoftype = 8;
   int32_t i, j;
-  __vector uint8_t xmm0[8];
+  __vector unsigned char xmm0[8];
 
   for (j = 0; j < vectorizable_elements; j += 16)
   {
@@ -96,7 +96,7 @@ shuffle16_altivec(uint8_t* const dest, const uint8_t* 
                   const int32_t vectorizable_elements, const int32_t total_elements) {
   static const int32_t bytesoftype = 16;
   int32_t i, j;
-  __vector uint8_t xmm0[16];
+  __vector unsigned char xmm0[16];
 
   for (j = 0; j < vectorizable_elements; j += 16)
   {
@@ -121,7 +121,7 @@ shuffle16_tiled_altivec(uint8_t* const dest, const uin
                         const int32_t bytesoftype) {
   int32_t j, k;
   const int32_t vecs_per_el_rem = bytesoftype & 0xF;
-  __vector uint8_t xmm[16];
+  __vector unsigned char xmm[16];
 
   for (j = 0; j < vectorizable_elements; j += 16) {
     /* Advance the offset into the type by the vector size (in bytes), unless this is
@@ -152,7 +152,7 @@ unshuffle2_altivec(uint8_t* const dest, const uint8_t*
                    const int32_t vectorizable_elements, const int32_t total_elements) {
   static const int32_t bytesoftype = 2;
   uint32_t i, j;
-  __vector uint8_t xmm0[2], xmm1[2];
+  __vector unsigned char xmm0[2], xmm1[2];
 
   for (j = 0; j < vectorizable_elements; j += 16) {
     /* Load 16 elements (32 bytes) into 2 vectors registers. */
@@ -176,7 +176,7 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t*
                    const int32_t vectorizable_elements, const int32_t total_elements) {
   static const int32_t bytesoftype = 4;
   uint32_t i, j;
-  __vector uint8_t xmm0[4], xmm1[4];
+  __vector unsigned char xmm0[4], xmm1[4];
 
   for (j = 0; j < vectorizable_elements; j += 16) {
     /* Load 16 elements (64 bytes) into 4 vectors registers. */
@@ -191,11 +191,11 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t*
     /* Shuffle 2-byte words */
     for (i = 0; i < 2; i++) {
       /* Compute the low 32 bytes */
-      xmm0[i] = (__vector uint8_t) vec_vmrghh((__vector uint16_t)xmm1[i * 2],
-                                              (__vector uint16_t) xmm1[i * 2 + 1]);
+      xmm0[i] = (__vector unsigned char) vec_vmrghh((__vector unsigned short)xmm1[i * 2],
+                                              (__vector unsigned short) xmm1[i * 2 + 1]);
       /* Compute the hi 32 bytes */
-      xmm0[i+2] = (__vector uint8_t) vec_vmrglh((__vector uint16_t)xmm1[i * 2],
-                                                (__vector uint16_t)xmm1[i * 2 + 1]);
+      xmm0[i+2] = (__vector unsigned char) vec_vmrglh((__vector unsigned short)xmm1[i * 2],
+                                                (__vector unsigned short)xmm1[i * 2 + 1]);
     }
     /* Store the result vectors in proper order */
     vec_xst(xmm0[0], bytesoftype * j, dest);
@@ -211,7 +211,7 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t*
                    const int32_t vectorizable_elements, const int32_t total_elements) {
   static const uint8_t bytesoftype = 8;
   uint32_t i, j;
-  __vector uint8_t xmm0[8], xmm1[8];
+  __vector unsigned char xmm0[8], xmm1[8];
 
   // Initialize permutations for writing
   for (j = 0; j < vectorizable_elements; j += 16) {
@@ -225,17 +225,17 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t*
     }
     /* Shuffle 2-byte words */
     for (i = 0; i < 4; i++) {
-      xmm0[i] = (__vector uint8_t)vec_vmrghh((__vector uint16_t)xmm1[i * 2],
-                                             (__vector uint16_t)xmm1[i * 2 + 1]);
-      xmm0[4 + i] = (__vector uint8_t)vec_vmrglh((__vector uint16_t)xmm1[i * 2],
-                                                 (__vector uint16_t)xmm1[i * 2 + 1]);
+      xmm0[i] = (__vector unsigned char)vec_vmrghh((__vector unsigned short)xmm1[i * 2],
+                                             (__vector unsigned short)xmm1[i * 2 + 1]);
+      xmm0[4 + i] = (__vector unsigned char)vec_vmrglh((__vector unsigned short)xmm1[i * 2],
+                                                 (__vector unsigned short)xmm1[i * 2 + 1]);
     }
     /* Shuffle 4-byte dwords */
     for (i = 0; i < 4; i++) {
-      xmm1[i] = (__vector uint8_t)vec_vmrghw((__vector uint32_t)xmm0[i * 2],
-                                             (__vector uint32_t)xmm0[i * 2 + 1]);
-      xmm1[4 + i] = (__vector uint8_t)vec_vmrglw((__vector uint32_t)xmm0[i * 2],
-                                                 (__vector uint32_t)xmm0[i * 2 + 1]);
+      xmm1[i] = (__vector unsigned char)vec_vmrghw((__vector unsigned int)xmm0[i * 2],
+                                             (__vector unsigned int)xmm0[i * 2 + 1]);
+      xmm1[4 + i] = (__vector unsigned char)vec_vmrglw((__vector unsigned int)xmm0[i * 2],
+                                                 (__vector unsigned int)xmm0[i * 2 + 1]);
     }
     /* Store the result vectors in proper order */
     vec_xst(xmm1[0], bytesoftype * j, dest);
@@ -256,7 +256,7 @@ unshuffle16_altivec(uint8_t* const dest, const uint8_t
                     const int32_t vectorizable_elements, const int32_t total_elements) {
   static const int32_t bytesoftype = 16;
   uint32_t i, j;
-  __vector uint8_t xmm0[16];
+  __vector unsigned char xmm0[16];
 
   for (j = 0; j < vectorizable_elements; j += 16) {
     /* Load 16 elements (64 bytes) into 4 vectors registers. */
@@ -280,7 +280,7 @@ unshuffle16_tiled_altivec(uint8_t* const dest, const u
                           const int32_t bytesoftype) {
   int32_t i, j, offset_into_type;
   const int32_t vecs_per_el_rem = bytesoftype &  0xF;
-  __vector uint8_t xmm[16];
+  __vector unsigned char xmm[16];
 
 
   /* Advance the offset into the type by the vector size (in bytes), unless this is