81 #ifndef INCLUDED_volk_32i_x2_and_32i_a_H
82 #define INCLUDED_volk_32i_x2_and_32i_a_H
87 #ifdef LV_HAVE_AVX512F
88 #include <immintrin.h>
90 static inline void volk_32i_x2_and_32i_a_avx512f(int32_t* cVector,
91 const int32_t* aVector,
92 const int32_t* bVector,
93 unsigned int num_points)
95 unsigned int number = 0;
96 const unsigned int sixteenthPoints = num_points / 16;
98 int32_t* cPtr = (int32_t*)cVector;
99 const int32_t* aPtr = (int32_t*)aVector;
100 const int32_t* bPtr = (int32_t*)bVector;
102 __m512i aVal, bVal, cVal;
103 for (; number < sixteenthPoints; number++) {
105 aVal = _mm512_load_si512(aPtr);
106 bVal = _mm512_load_si512(bPtr);
108 cVal = _mm512_and_si512(aVal, bVal);
110 _mm512_store_si512(cPtr, cVal);
117 number = sixteenthPoints * 16;
118 for (; number < num_points; number++) {
119 cVector[number] = aVector[number] & bVector[number];
125 #include <immintrin.h>
127 static inline void volk_32i_x2_and_32i_a_avx2(int32_t* cVector,
128 const int32_t* aVector,
129 const int32_t* bVector,
130 unsigned int num_points)
132 unsigned int number = 0;
133 const unsigned int oneEightPoints = num_points / 8;
135 int32_t* cPtr = cVector;
136 const int32_t* aPtr = aVector;
137 const int32_t* bPtr = bVector;
139 __m256i aVal, bVal, cVal;
140 for (; number < oneEightPoints; number++) {
142 aVal = _mm256_load_si256((__m256i*)aPtr);
143 bVal = _mm256_load_si256((__m256i*)bPtr);
145 cVal = _mm256_and_si256(aVal, bVal);
147 _mm256_store_si256((__m256i*)cPtr,
155 number = oneEightPoints * 8;
156 for (; number < num_points; number++) {
157 cVector[number] = aVector[number] & bVector[number];
164 #include <xmmintrin.h>
167 const int32_t* aVector,
168 const int32_t* bVector,
169 unsigned int num_points)
171 unsigned int number = 0;
172 const unsigned int quarterPoints = num_points / 4;
174 float* cPtr = (
float*)cVector;
175 const float* aPtr = (
float*)aVector;
176 const float* bPtr = (
float*)bVector;
178 __m128 aVal, bVal, cVal;
179 for (; number < quarterPoints; number++) {
181 aVal = _mm_load_ps(aPtr);
182 bVal = _mm_load_ps(bPtr);
184 cVal = _mm_and_ps(aVal, bVal);
186 _mm_store_ps(cPtr, cVal);
193 number = quarterPoints * 4;
194 for (; number < num_points; number++) {
195 cVector[number] = aVector[number] & bVector[number];
202 #include <arm_neon.h>
205 const int32_t* aVector,
206 const int32_t* bVector,
207 unsigned int num_points)
209 int32_t* cPtr = cVector;
210 const int32_t* aPtr = aVector;
211 const int32_t* bPtr = bVector;
212 unsigned int number = 0;
213 unsigned int quarter_points = num_points / 4;
215 int32x4_t a_val, b_val, c_val;
217 for (number = 0; number < quarter_points; number++) {
218 a_val = vld1q_s32(aPtr);
219 b_val = vld1q_s32(bPtr);
220 c_val = vandq_s32(a_val, b_val);
221 vst1q_s32(cPtr, c_val);
227 for (number = quarter_points * 4; number < num_points; number++) {
228 *cPtr++ = (*aPtr++) & (*bPtr++);
234 #ifdef LV_HAVE_GENERIC
237 const int32_t* aVector,
238 const int32_t* bVector,
239 unsigned int num_points)
241 int32_t* cPtr = cVector;
242 const int32_t* aPtr = aVector;
243 const int32_t* bPtr = bVector;
244 unsigned int number = 0;
246 for (number = 0; number < num_points; number++) {
247 *cPtr++ = (*aPtr++) & (*bPtr++);
254 extern void volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector,
255 const int32_t* aVector,
256 const int32_t* bVector,
257 unsigned int num_points);
259 static inline void volk_32i_x2_and_32i_u_orc(int32_t* cVector,
260 const int32_t* aVector,
261 const int32_t* bVector,
262 unsigned int num_points)
264 volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points);
272 #ifndef INCLUDED_volk_32i_x2_and_32i_u_H
273 #define INCLUDED_volk_32i_x2_and_32i_u_H
275 #include <inttypes.h>
278 #ifdef LV_HAVE_AVX512F
279 #include <immintrin.h>
281 static inline void volk_32i_x2_and_32i_u_avx512f(int32_t* cVector,
282 const int32_t* aVector,
283 const int32_t* bVector,
284 unsigned int num_points)
286 unsigned int number = 0;
287 const unsigned int sixteenthPoints = num_points / 16;
289 int32_t* cPtr = (int32_t*)cVector;
290 const int32_t* aPtr = (int32_t*)aVector;
291 const int32_t* bPtr = (int32_t*)bVector;
293 __m512i aVal, bVal, cVal;
294 for (; number < sixteenthPoints; number++) {
296 aVal = _mm512_loadu_si512(aPtr);
297 bVal = _mm512_loadu_si512(bPtr);
299 cVal = _mm512_and_si512(aVal, bVal);
301 _mm512_storeu_si512(cPtr, cVal);
308 number = sixteenthPoints * 16;
309 for (; number < num_points; number++) {
310 cVector[number] = aVector[number] & bVector[number];
316 #include <immintrin.h>
318 static inline void volk_32i_x2_and_32i_u_avx2(int32_t* cVector,
319 const int32_t* aVector,
320 const int32_t* bVector,
321 unsigned int num_points)
323 unsigned int number = 0;
324 const unsigned int oneEightPoints = num_points / 8;
326 int32_t* cPtr = cVector;
327 const int32_t* aPtr = aVector;
328 const int32_t* bPtr = bVector;
330 __m256i aVal, bVal, cVal;
331 for (; number < oneEightPoints; number++) {
333 aVal = _mm256_loadu_si256((__m256i*)aPtr);
334 bVal = _mm256_loadu_si256((__m256i*)bPtr);
336 cVal = _mm256_and_si256(aVal, bVal);
338 _mm256_storeu_si256((__m256i*)cPtr,
346 number = oneEightPoints * 8;
347 for (; number < num_points; number++) {
348 cVector[number] = aVector[number] & bVector[number];
static void volk_32i_x2_and_32i_a_sse(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_and_32i.h:166
static void volk_32i_x2_and_32i_generic(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_and_32i.h:236
static void volk_32i_x2_and_32i_neon(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_and_32i.h:204