71 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
72 #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
78 #include <immintrin.h>
80 static inline void volk_32fc_deinterleave_real_64f_a_avx2(
double* iBuffer,
82 unsigned int num_points)
84 unsigned int number = 0;
86 const float* complexVectorPtr = (
float*)complexVector;
87 double* iBufferPtr = iBuffer;
89 const unsigned int quarterPoints = num_points / 4;
93 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
94 for (; number < quarterPoints; number++) {
96 cplxValue = _mm256_load_ps(complexVectorPtr);
97 complexVectorPtr += 8;
100 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
101 fVal = _mm256_extractf128_ps(cplxValue, 0);
102 dVal = _mm256_cvtps_pd(fVal);
103 _mm256_store_pd(iBufferPtr, dVal);
108 number = quarterPoints * 4;
109 for (; number < num_points; number++) {
110 *iBufferPtr++ = (double)*complexVectorPtr++;
117 #include <emmintrin.h>
121 unsigned int num_points)
123 unsigned int number = 0;
125 const float* complexVectorPtr = (
float*)complexVector;
126 double* iBufferPtr = iBuffer;
128 const unsigned int halfPoints = num_points / 2;
129 __m128 cplxValue, fVal;
131 for (; number < halfPoints; number++) {
133 cplxValue = _mm_load_ps(complexVectorPtr);
134 complexVectorPtr += 4;
137 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
138 dVal = _mm_cvtps_pd(fVal);
139 _mm_store_pd(iBufferPtr, dVal);
144 number = halfPoints * 2;
145 for (; number < num_points; number++) {
146 *iBufferPtr++ = (double)*complexVectorPtr++;
152 #ifdef LV_HAVE_GENERIC
156 unsigned int num_points)
158 unsigned int number = 0;
159 const float* complexVectorPtr = (
float*)complexVector;
160 double* iBufferPtr = iBuffer;
161 for (number = 0; number < num_points; number++) {
162 *iBufferPtr++ = (double)*complexVectorPtr++;
168 #ifdef LV_HAVE_NEONV8
169 #include <arm_neon.h>
171 static inline void volk_32fc_deinterleave_real_64f_neon(
double* iBuffer,
173 unsigned int num_points)
175 unsigned int number = 0;
176 unsigned int quarter_points = num_points / 4;
177 const float* complexVectorPtr = (
float*)complexVector;
178 double* iBufferPtr = iBuffer;
179 float32x2x4_t complexInput;
184 for (number = 0; number < quarter_points; number++) {
186 complexInput = vld4_f32(complexVectorPtr);
189 iVal1 = vcvt_f64_f32(complexInput.val[0]);
190 iVal2 = vcvt_f64_f32(complexInput.val[2]);
195 vst2q_f64(iBufferPtr, iVal);
199 complexVectorPtr += 8;
202 for (number = quarter_points * 4; number < num_points; number++) {
203 *iBufferPtr++ = (double)*complexVectorPtr++;
211 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
212 #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
214 #include <inttypes.h>
218 #include <immintrin.h>
220 static inline void volk_32fc_deinterleave_real_64f_u_avx2(
double* iBuffer,
222 unsigned int num_points)
224 unsigned int number = 0;
226 const float* complexVectorPtr = (
float*)complexVector;
227 double* iBufferPtr = iBuffer;
229 const unsigned int quarterPoints = num_points / 4;
233 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
234 for (; number < quarterPoints; number++) {
236 cplxValue = _mm256_loadu_ps(complexVectorPtr);
237 complexVectorPtr += 8;
240 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
241 fVal = _mm256_extractf128_ps(cplxValue, 0);
242 dVal = _mm256_cvtps_pd(fVal);
243 _mm256_storeu_pd(iBufferPtr, dVal);
248 number = quarterPoints * 4;
249 for (; number < num_points; number++) {
250 *iBufferPtr++ = (double)*complexVectorPtr++;
static void volk_32fc_deinterleave_real_64f_a_sse2(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:119
static void volk_32fc_deinterleave_real_64f_generic(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:154
float complex lv_32fc_t
Definition: volk_complex.h:65