53 #ifndef INCLUDED_volk_16i_convert_8i_u_H
54 #define INCLUDED_volk_16i_convert_8i_u_H
60 #include <immintrin.h>
62 static inline void volk_16i_convert_8i_u_avx2(int8_t* outputVector,
63 const int16_t* inputVector,
64 unsigned int num_points)
66 unsigned int number = 0;
67 const unsigned int thirtysecondPoints = num_points / 32;
69 int8_t* outputVectorPtr = outputVector;
70 int16_t* inputPtr = (int16_t*)inputVector;
75 for (; number < thirtysecondPoints; number++) {
78 inputVal1 = _mm256_loadu_si256((__m256i*)inputPtr);
80 inputVal2 = _mm256_loadu_si256((__m256i*)inputPtr);
83 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
84 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
86 ret = _mm256_packs_epi16(inputVal1, inputVal2);
87 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
89 _mm256_storeu_si256((__m256i*)outputVectorPtr, ret);
91 outputVectorPtr += 32;
94 number = thirtysecondPoints * 32;
95 for (; number < num_points; number++) {
96 outputVector[number] = (int8_t)(inputVector[number] >> 8);
103 #include <emmintrin.h>
106 const int16_t* inputVector,
107 unsigned int num_points)
109 unsigned int number = 0;
110 const unsigned int sixteenthPoints = num_points / 16;
112 int8_t* outputVectorPtr = outputVector;
113 int16_t* inputPtr = (int16_t*)inputVector;
118 for (; number < sixteenthPoints; number++) {
121 inputVal1 = _mm_loadu_si128((__m128i*)inputPtr);
123 inputVal2 = _mm_loadu_si128((__m128i*)inputPtr);
126 inputVal1 = _mm_srai_epi16(inputVal1, 8);
127 inputVal2 = _mm_srai_epi16(inputVal2, 8);
129 ret = _mm_packs_epi16(inputVal1, inputVal2);
131 _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
133 outputVectorPtr += 16;
136 number = sixteenthPoints * 16;
137 for (; number < num_points; number++) {
138 outputVector[number] = (int8_t)(inputVector[number] >> 8);
144 #ifdef LV_HAVE_GENERIC
147 const int16_t* inputVector,
148 unsigned int num_points)
150 int8_t* outputVectorPtr = outputVector;
151 const int16_t* inputVectorPtr = inputVector;
152 unsigned int number = 0;
154 for (number = 0; number < num_points; number++) {
155 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
162 #ifndef INCLUDED_volk_16i_convert_8i_a_H
163 #define INCLUDED_volk_16i_convert_8i_a_H
165 #include <inttypes.h>
169 #include <immintrin.h>
171 static inline void volk_16i_convert_8i_a_avx2(int8_t* outputVector,
172 const int16_t* inputVector,
173 unsigned int num_points)
175 unsigned int number = 0;
176 const unsigned int thirtysecondPoints = num_points / 32;
178 int8_t* outputVectorPtr = outputVector;
179 int16_t* inputPtr = (int16_t*)inputVector;
184 for (; number < thirtysecondPoints; number++) {
187 inputVal1 = _mm256_load_si256((__m256i*)inputPtr);
189 inputVal2 = _mm256_load_si256((__m256i*)inputPtr);
192 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
193 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
195 ret = _mm256_packs_epi16(inputVal1, inputVal2);
196 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
198 _mm256_store_si256((__m256i*)outputVectorPtr, ret);
200 outputVectorPtr += 32;
203 number = thirtysecondPoints * 32;
204 for (; number < num_points; number++) {
205 outputVector[number] = (int8_t)(inputVector[number] >> 8);
212 #include <emmintrin.h>
215 const int16_t* inputVector,
216 unsigned int num_points)
218 unsigned int number = 0;
219 const unsigned int sixteenthPoints = num_points / 16;
221 int8_t* outputVectorPtr = outputVector;
222 int16_t* inputPtr = (int16_t*)inputVector;
227 for (; number < sixteenthPoints; number++) {
230 inputVal1 = _mm_load_si128((__m128i*)inputPtr);
232 inputVal2 = _mm_load_si128((__m128i*)inputPtr);
235 inputVal1 = _mm_srai_epi16(inputVal1, 8);
236 inputVal2 = _mm_srai_epi16(inputVal2, 8);
238 ret = _mm_packs_epi16(inputVal1, inputVal2);
240 _mm_store_si128((__m128i*)outputVectorPtr, ret);
242 outputVectorPtr += 16;
245 number = sixteenthPoints * 16;
246 for (; number < num_points; number++) {
247 outputVector[number] = (int8_t)(inputVector[number] >> 8);
254 #include <arm_neon.h>
257 const int16_t* inputVector,
258 unsigned int num_points)
260 int8_t* outputVectorPtr = outputVector;
261 const int16_t* inputVectorPtr = inputVector;
262 unsigned int number = 0;
263 unsigned int sixteenth_points = num_points / 16;
271 for (number = 0; number < sixteenth_points; number++) {
273 inputVal0 = vld1q_s16(inputVectorPtr);
274 inputVal1 = vld1q_s16(inputVectorPtr + 8);
276 outputVal0 = vshrn_n_s16(inputVal0, 8);
277 outputVal1 = vshrn_n_s16(inputVal1, 8);
279 outputVal = vcombine_s8(outputVal0, outputVal1);
280 vst1q_s8(outputVectorPtr, outputVal);
281 inputVectorPtr += 16;
282 outputVectorPtr += 16;
285 for (number = sixteenth_points * 16; number < num_points; number++) {
286 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
292 #ifdef LV_HAVE_GENERIC
295 const int16_t* inputVector,
296 unsigned int num_points)
298 int8_t* outputVectorPtr = outputVector;
299 const int16_t* inputVectorPtr = inputVector;
300 unsigned int number = 0;
302 for (number = 0; number < num_points; number++) {
303 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
static void volk_16i_convert_8i_a_sse2(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:214
static void volk_16i_convert_8i_u_sse2(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:105
static void volk_16i_convert_8i_a_generic(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:294
static void volk_16i_convert_8i_neon(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:256
static void volk_16i_convert_8i_generic(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:146