20 #if !defined(__MITSUBA_CORE_SSEMATH_H_)
21 #define __MITSUBA_CORE_SSEMATH_H_
84 FINLINE
void transpose_ps(__m128& row0, __m128& row1,
85 __m128& row2, __m128& row3) {
86 __m128 tmp3, tmp2, tmp1, tmp0;
87 tmp0 = _mm_unpacklo_ps(row0, row1);
88 tmp2 = _mm_unpacklo_ps(row2, row3);
89 tmp1 = _mm_unpackhi_ps(row0, row1);
90 tmp3 = _mm_unpackhi_ps(row2, row3);
92 row0 = _mm_movelh_ps(tmp0, tmp2);
93 row1 = _mm_movehl_ps(tmp2, tmp0);
94 row2 = _mm_movelh_ps(tmp1, tmp3);
95 row3 = _mm_movehl_ps(tmp3, tmp1);
99 inline __m128 clamp_ps(__m128 x, __m128 minVal, __m128 maxVal) {
100 return _mm_max_ps(_mm_min_ps(x, maxVal), minVal);
104 inline float hsum_ps(__m128 vec) {
105 __m128 tmp = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1,0,3,2));
106 __m128 sum_tmp = _mm_add_ps(vec, tmp);
107 tmp = _mm_shuffle_ps(sum_tmp, sum_tmp, _MM_SHUFFLE(2,3,0,1));
108 sum_tmp = _mm_add_ps(sum_tmp, tmp);
109 return _mm_cvtss_f32(sum_tmp);
113 inline float hmax_ps(__m128 vec) {
114 __m128 tmp = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1,0,3,2));
115 __m128 tmp_max = _mm_max_ps(vec, tmp);
116 tmp = _mm_shuffle_ps(tmp_max, tmp_max, _MM_SHUFFLE(2,3,0,1));
117 tmp_max = _mm_max_ps(tmp_max, tmp);
118 return _mm_cvtss_f32(tmp_max);
122 inline float hmin_ps(__m128 vec) {
123 __m128 tmp = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1,0,3,2));
124 __m128 tmp_min = _mm_min_ps(vec, tmp);
125 tmp = _mm_shuffle_ps(tmp_min, tmp_min, _MM_SHUFFLE(2,3,0,1));
126 tmp_min = _mm_min_ps(tmp_min, tmp);
127 return _mm_cvtss_f32(tmp_min);
#define MTS_EXPORT_CORE
Definition: getopt.h:29