20 #if !defined(__MITSUBA_CORE_SSE_H_)
21 #define __MITSUBA_CORE_SSE_H_
27 #define MM_ALIGN16 __attribute__ ((aligned (16)))
28 #define MM_ALIGN32 __attribute__ ((aligned (32)))
29 #define MM_ALIGN64 __attribute__ ((aligned (64)))
30 #elif defined(__MSVC__)
31 #define MM_ALIGN16 __declspec(align(16))
32 #define MM_ALIGN32 __declspec(align(32))
33 #define MM_ALIGN64 __declspec(align(64))
35 #error Unsupported compiler!
37 #define STACK_ALIGN16(t) reinterpret_cast<float *>((reinterpret_cast<size_t>(t)+0x0F) & ~(size_t) 0x0F)
38 #define STACK_ALIGN32(t) reinterpret_cast<float *>((reinterpret_cast<size_t>(t)+0x1F) & ~(size_t) 0x1F)
39 #define STACK_ALIGN64(t) reinterpret_cast<float *>((reinterpret_cast<size_t>(t)+0x3F) & ~(size_t) 0x3F)
43 #define enable_fpexcept_sse()
44 #define query_fpexcept_sse() 0
45 #define disable_fpexcept_sse()
48 #include <emmintrin.h>
52 # pragma intrinsic(__rdtsc)
55 #define splat_ps(ps, i) _mm_shuffle_ps ((ps),(ps), (i<<6) | (i<<4) | (i<<2) | i)
56 #define splat_epi32(ps, i) _mm_shuffle_epi32((ps), (i<<6) | (i<<4) | (i<<2) | i)
57 #define mux_ps(sel, op1, op2) _mm_or_ps (_mm_and_ps ((sel), (op1)), _mm_andnot_ps ((sel), (op2)))
58 #define mux_epi32(sel, op1, op2) _mm_or_si128(_mm_and_si128((sel), (op1)), _mm_andnot_si128((sel), (op2)))
59 #define enable_fpexcept_sse() _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~(_MM_MASK_INVALID | _MM_MASK_DIV_ZERO))
60 #define query_fpexcept_sse() (~_MM_GET_EXCEPTION_MASK() & (_MM_MASK_INVALID | _MM_MASK_DIV_ZERO))
61 #define disable_fpexcept_sse() _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() | (_MM_MASK_INVALID | _MM_MASK_DIV_ZERO))
62 #define load1_epi32(i) _mm_shuffle_epi32(_mm_cvtsi32_si128(i), 0)
63 #define negate_ps(val) _mm_xor_ps((val), SSEConstants::negation_mask.ps)
65 #define pstoepi32(ps) _mm_castps_si128(ps)
66 #define epi32tops(pi) _mm_castsi128_ps(pi)
68 #ifndef SINGLE_PRECISION
69 #error SSE2 only supported with single precision
84 struct {
float f0,f1,f2,f3; };
85 struct { int32_t i0,i1,i2,i3; };
86 struct {
uint32_t ui0,ui1,ui2,ui3; };
91 explicit SSEVector(__m128 ps)
95 explicit SSEVector(
float f0,
float f1,
float f2,
float f3)
96 : f0(f0), f1(f1), f2(f2), f3(f3) {
99 explicit SSEVector(
float f) : f0(f), f1(f), f2(f), f3(f) {}
101 explicit SSEVector(int32_t i0, int32_t i1, int32_t i2, int32_t i3)
102 : i0(i0), i1(i1), i2(i2), i3(i3) {
105 explicit SSEVector(int32_t i) : i0(i), i1(i), i2(i), i3(i) {}
108 : ui0(ui0), ui1(ui1), ui2(ui2), ui3(ui3) {
111 explicit SSEVector(
uint32_t ui) : ui0(ui), ui1(ui), ui2(ui), ui3(ui) {}
113 inline SSEVector &operator=(
const SSEVector &vec) {
126 static const MM_ALIGN16 SSEVector zero;
128 static const MM_ALIGN16 SSEVector one;
130 static const MM_ALIGN16 SSEVector max;
132 static const MM_ALIGN16 SSEVector eps;
134 static const MM_ALIGN16 SSEVector op_eps;
136 static const MM_ALIGN16 SSEVector om_eps;
138 static const MM_ALIGN16 SSEVector p_inf;
140 static const MM_ALIGN16 SSEVector n_inf;
142 static const MM_ALIGN16 SSEVector ffffffff;
144 static const MM_ALIGN16 SSEVector negation_mask;
148 typedef SSEVector QuadVector[3];
151 inline void _mm_debug_ps(
const char *desc, __m128 value) {
153 _mm_storeu_ps(dest, value);
154 printf(
"%s: [%f, %f, %f, %f]\n", desc, dest[0], dest[1], dest[2], dest[3]);
#define MTS_EXPORT_CORE
Definition: getopt.h:29