Main MRPT website > C++ reference for MRPT 1.4.0
CImage_SSE2.cpp
Go to the documentation of this file.
1/* +---------------------------------------------------------------------------+
2 | Mobile Robot Programming Toolkit (MRPT) |
3 | http://www.mrpt.org/ |
4 | |
5 | Copyright (c) 2005-2016, Individual contributors, see AUTHORS file |
6 | See: http://www.mrpt.org/Authors - All rights reserved. |
7 | Released under BSD License. See details in http://www.mrpt.org/License |
8 +---------------------------------------------------------------------------+ */
9
10#include "base-precomp.h" // Precompiled headers
11
12#if MRPT_HAS_SSE2
13// ---------------------------------------------------------------------------
14// This file contains the SSE2 optimized functions for mrpt::utils::CImage
15// See the sources and the doxygen documentation page "sse_optimizations" for more details.
16//
17// Some functions here are derived from sources in libcvd, released
18// under LGPL. See http://mi.eng.cam.ac.uk/~er258/cvd/
19//
20// ---------------------------------------------------------------------------
21
22#include <mrpt/utils/CImage.h>
25#include "CImage_SSEx.h"
26
27/** \addtogroup sse_optimizations
28 * SSE optimized functions
29 * @{
30 */
31
32/** Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel & ignoring the other 3
33 * - <b>Input format:</b> uint8_t, 1 channel
34 * - <b>Output format:</b> uint8_t, 1 channel
35 * - <b>Preconditions:</b> in & out aligned to 16bytes, w = k*16 (w=width in pixels), widthStep=w*1
36 * - <b>Notes:</b>
37 * - <b>Requires:</b> SSE2
38 * - <b>Invoked from:</b> mrpt::utils::CImage::scaleHalf()
39 */
40void image_SSE2_scale_half_1c8u(const uint8_t* in, uint8_t* out, int w, int h)
41{
42 MRPT_ALIGN16 const unsigned long long mask[2] = {0x00FF00FF00FF00FFull, 0x00FF00FF00FF00FFull};
43 const __m128i m = _mm_load_si128((const __m128i*)mask);
44
45 int sw = w >> 4;
46 int sh = h >> 1;
47
48 for (int i=0; i<sh; i++)
49 {
50 for (int j=0; j<sw; j++)
51 {
52 const __m128i here_sampled = _mm_and_si128( _mm_load_si128((const __m128i*)in), m);
53 _mm_storel_epi64((__m128i*)out, _mm_packus_epi16(here_sampled,here_sampled));
54 in += 16;
55 out += 8;
56 }
57 in += w;
58 }
59}
60
61
62/** Average each 2x2 pixels into 1x1 pixel (arithmetic average)
63 * - <b>Input format:</b> uint8_t, 1 channel
64 * - <b>Output format:</b> uint8_t, 1 channel
65 * - <b>Preconditions:</b> in & out aligned to 16bytes, w = k*16 (w=width in pixels), widthStep=w*1
66 * - <b>Notes:</b>
67 * - <b>Requires:</b> SSE2
68 * - <b>Invoked from:</b> mrpt::utils::CImage::scaleHalfSmooth()
69 */
70void image_SSE2_scale_half_smooth_1c8u(const uint8_t* in, uint8_t* out, int w, int h)
71{
72 MRPT_ALIGN16 const unsigned long long mask[2] = {0x00FF00FF00FF00FFull, 0x00FF00FF00FF00FFull};
73 const uint8_t* nextRow = in + w;
74 __m128i m = _mm_load_si128((const __m128i*)mask);
75 int sw = w >> 4;
76 int sh = h >> 1;
77
78 for (int i=0; i<sh; i++)
79 {
80 for (int j=0; j<sw; j++)
81 {
82 __m128i here = _mm_load_si128((const __m128i*)in);
83 __m128i next = _mm_load_si128((const __m128i*)nextRow);
84 here = _mm_avg_epu8(here,next);
85 next = _mm_and_si128(_mm_srli_si128(here,1), m);
86 here = _mm_and_si128(here,m);
87 here = _mm_avg_epu16(here, next);
88 _mm_storel_epi64((__m128i*)out, _mm_packus_epi16(here,here));
89 in += 16;
90 nextRow += 16;
91 out += 8;
92 }
93
94 in += w;
95 nextRow += w;
96 }
97}
98
99
100
101/** KLT score at a given point of a grayscale image.
102 * - <b>Requires:</b> SSE2
103 * - <b>Invoked from:</b> mrpt::utils::CImage::KLT_response()
104 *
105 * This function is not manually optimized for SSE2 but templatized for different
106 * window sizes such as the compiler can optimize automatically for that size.
107 *
108 * Only for the most common window sizes this templates are instantiated (W=[2-16] and W=32 ),
109 * falling back to
110 * a generic implementation otherwise. The next figure shows the performance (time for
111 * KLT_response() to compute the score for one single pixel) for different window sizes.
112 *
113 * <img src="KLT_response_performance_SSE2.png" >
114 *
115 */
117
118// TODO:
119// Sum of absolute differences: Use _mm_sad_epu8
120
121/** @} */
122
123#endif // end if MRPT_HAS_SSE2
float KLT_response_optimized()
KLT score at a given point of a grayscale image.
void image_SSE2_scale_half_1c8u(const uint8_t *in, uint8_t *out, int w, int h)
Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel & ignoring the other 3.
Definition: CImage_SSE2.cpp:40
void image_SSE2_scale_half_smooth_1c8u(const uint8_t *in, uint8_t *out, int w, int h)
Average each 2x2 pixels into 1x1 pixel (arithmetic average)
Definition: CImage_SSE2.cpp:70
#define MRPT_ALIGN16
Definition: mrpt_macros.h:92



Page generated by Doxygen 1.9.2 for MRPT 1.4.0 SVN: at Mon Sep 20 00:21:40 UTC 2021