Mitsuba Renderer  0.5.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sse.h
Go to the documentation of this file.
1 /*
2  This file is part of Mitsuba, a physically based rendering system.
3 
4  Copyright (c) 2007-2014 by Wenzel Jakob and others.
5 
6  Mitsuba is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License Version 3
8  as published by the Free Software Foundation.
9 
10  Mitsuba is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #pragma once
20 #if !defined(__MITSUBA_CORE_SSE_H_)
21 #define __MITSUBA_CORE_SSE_H_
22 
23 #include <mitsuba/core/platform.h>
24 #include <stdio.h>
25 
26 #if defined(__GNUC__)
27 #define MM_ALIGN16 __attribute__ ((aligned (16)))
28 #define MM_ALIGN32 __attribute__ ((aligned (32)))
29 #define MM_ALIGN64 __attribute__ ((aligned (64)))
30 #elif defined(__MSVC__)
31 #define MM_ALIGN16 __declspec(align(16))
32 #define MM_ALIGN32 __declspec(align(32))
33 #define MM_ALIGN64 __declspec(align(64))
34 #else
35 #error Unsupported compiler!
36 #endif
37 #define STACK_ALIGN16(t) reinterpret_cast<float *>((reinterpret_cast<size_t>(t)+0x0F) & ~(size_t) 0x0F)
38 #define STACK_ALIGN32(t) reinterpret_cast<float *>((reinterpret_cast<size_t>(t)+0x1F) & ~(size_t) 0x1F)
39 #define STACK_ALIGN64(t) reinterpret_cast<float *>((reinterpret_cast<size_t>(t)+0x3F) & ~(size_t) 0x3F)
40 
41 /* ========= SSE intrinsics ========= */
42 #ifndef MTS_SSE
43 #define enable_fpexcept_sse()
44 #define query_fpexcept_sse() 0
45 #define disable_fpexcept_sse()
46 #else
47 /* Include SSE intrinsics header file */
48 #include <emmintrin.h>
49 /* MSVC intrinsics header (for RDTSC) */
50 #if defined(__MSVC__)
51 # include <intrin.h>
52 # pragma intrinsic(__rdtsc)
53 #endif
54 
55 #define splat_ps(ps, i) _mm_shuffle_ps ((ps),(ps), (i<<6) | (i<<4) | (i<<2) | i)
56 #define splat_epi32(ps, i) _mm_shuffle_epi32((ps), (i<<6) | (i<<4) | (i<<2) | i)
57 #define mux_ps(sel, op1, op2) _mm_or_ps (_mm_and_ps ((sel), (op1)), _mm_andnot_ps ((sel), (op2)))
58 #define mux_epi32(sel, op1, op2) _mm_or_si128(_mm_and_si128((sel), (op1)), _mm_andnot_si128((sel), (op2)))
59 #define enable_fpexcept_sse() _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~(_MM_MASK_INVALID | _MM_MASK_DIV_ZERO))
60 #define query_fpexcept_sse() (~_MM_GET_EXCEPTION_MASK() & (_MM_MASK_INVALID | _MM_MASK_DIV_ZERO))
61 #define disable_fpexcept_sse() _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() | (_MM_MASK_INVALID | _MM_MASK_DIV_ZERO))
62 #define load1_epi32(i) _mm_shuffle_epi32(_mm_cvtsi32_si128(i), 0)
63 #define negate_ps(val) _mm_xor_ps((val), SSEConstants::negation_mask.ps)
64 
65 #define pstoepi32(ps) _mm_castps_si128(ps)
66 #define epi32tops(pi) _mm_castsi128_ps(pi)
67 
68 #ifndef SINGLE_PRECISION
69 #error SSE2 only supported with single precision
70 #endif
71 
73 
74 /**
75  * \headerfile mitsuba/core/sse.h mitsuba/mitsuba.h
76  * \brief SSE 4-vector and useful aliases
77  */
78 union SSEVector {
79  __m128 ps;
80  __m128i pi;
81  float f[4];
82  int32_t i[4];
83  uint32_t ui[4];
84  struct { float f0,f1,f2,f3; };
85  struct { int32_t i0,i1,i2,i3; };
86  struct { uint32_t ui0,ui1,ui2,ui3; };
87 
88  inline SSEVector() {
89  }
90 
91  explicit SSEVector(__m128 ps)
92  : ps(ps) {
93  }
94 
95  explicit SSEVector(float f0, float f1, float f2, float f3)
96  : f0(f0), f1(f1), f2(f2), f3(f3) {
97  }
98 
99  explicit SSEVector(float f) : f0(f), f1(f), f2(f), f3(f) {}
100 
101  explicit SSEVector(int32_t i0, int32_t i1, int32_t i2, int32_t i3)
102  : i0(i0), i1(i1), i2(i2), i3(i3) {
103  }
104 
105  explicit SSEVector(int32_t i) : i0(i), i1(i), i2(i), i3(i) {}
106 
107  explicit SSEVector(uint32_t ui0, uint32_t ui1, uint32_t ui2, uint32_t ui3)
108  : ui0(ui0), ui1(ui1), ui2(ui2), ui3(ui3) {
109  }
110 
111  explicit SSEVector(uint32_t ui) : ui0(ui), ui1(ui), ui2(ui), ui3(ui) {}
112 
113  inline SSEVector &operator=(const SSEVector &vec) {
114  ps = vec.ps;
115  return *this;
116  }
117 };
118 
119 /**
120  * \brief Some useful constant values for use with SSE
121  * \headerfile mitsuba/core/sse.h mitsuba/mitsuba.h
122  */
123 class MTS_EXPORT_CORE SSEConstants {
124 public:
125  /// (0, 0, 0, 0)
126  static const MM_ALIGN16 SSEVector zero;
127  /// (1, 1, 1, 1)
128  static const MM_ALIGN16 SSEVector one;
129  /// (flt_max, flt_max, flt_max, flt_max)
130  static const MM_ALIGN16 SSEVector max;
131  /// (eps, eps, eps, eps)
132  static const MM_ALIGN16 SSEVector eps;
133  /// (1+eps, 1+eps, 1+eps, 1+eps)
134  static const MM_ALIGN16 SSEVector op_eps;
135  /// (1-eps, 1-eps, 1-eps, 1-eps)
136  static const MM_ALIGN16 SSEVector om_eps;
137  /// (+inf, +inf, +inf, +inf)
138  static const MM_ALIGN16 SSEVector p_inf;
139  /// (-inf, -inf, -inf, -inf)
140  static const MM_ALIGN16 SSEVector n_inf;
141  /// (0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
142  static const MM_ALIGN16 SSEVector ffffffff;
143  /// (0x80000000, 0x80000000, 0x80000000, 0x80000000)
144  static const MM_ALIGN16 SSEVector negation_mask;
145 };
146 
147 /** Four 3D vectors as SoA (structure of arrays) */
148 typedef SSEVector QuadVector[3];
149 
150 /// Print an SSE single precision 4-tuple for debugging
151 inline void _mm_debug_ps(const char *desc, __m128 value) {
152  float dest[4];
153  _mm_storeu_ps(dest, value);
154  printf("%s: [%f, %f, %f, %f]\n", desc, dest[0], dest[1], dest[2], dest[3]);
155 }
156 
158 
159 #endif /* MTS_SSE */
160 
161 #endif /* __MITSUBA_CORE_SSE_H_ */
#define MTS_EXPORT_CORE
Definition: getopt.h:29
#define MTS_NAMESPACE_BEGIN
Definition: platform.h:137
#define MTS_NAMESPACE_END
Definition: platform.h:138