Mitsuba Renderer  0.5.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ssevector.h
Go to the documentation of this file.
1 /*
2  This file is part of Mitsuba, a physically based rendering system.
3 
4  Copyright (c) 2007-2014 by Wenzel Jakob and others.
5 
6  Mitsuba is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License Version 3
8  as published by the Free Software Foundation.
9 
10  Mitsuba is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 /*============================================================================
20  HDRITools - High Dynamic Range Image Tools
21  Copyright 2008-2012 Program of Computer Graphics, Cornell University
22 
23  Permission is hereby granted, free of charge, to any person obtaining a copy
24  of this software and associated documentation files (the "Software"), to deal
25  in the Software without restriction, including without limitation the rights
26  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27  copies of the Software, and to permit persons to whom the Software is
28  furnished to do so, subject to the following conditions:
29 
30  The above copyright notice and this permission notice shall be included in
31  all copies or substantial portions of the Software.
32 
33  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39  THE SOFTWARE.
40  -----------------------------------------------------------------------------
41  Primary author:
42  Edgar Velazquez-Armendariz <cs#cornell#edu - eva5>
43 ============================================================================*/
44 
45 #pragma once
46 #if !defined(__MITSUBA_CORE_SSEVECTOR_H_)
47 #define __MITSUBA_CORE_SSEVECTOR_H_
48 
49 #include <mitsuba/core/platform.h>
50 #include <mitsuba/core/sse.h>
51 
52 #if !MTS_SSE
53 # error "This header requires SSE support"
54 #endif
55 
57 
58 namespace math
59 {
60 
61 // Forward declarations, required by Clang and ICL 12.1
62 struct SSEVector4f;
63 struct SSEvector4i;
64 
65 template <int idx3, int idx2, int idx1, int idx0>
66 SSEVector4f shuffle(const SSEVector4f& low, const SSEVector4f& hi);
67 
68 template <int idx3, int idx2, int idx1, int idx0>
69 SSEVector4f shuffle(const SSEVector4f& a);
70 
71 
73 {
74 private:
75  __m128 xmm;
76 
77 public:
79  SSEVector4f(const SSEVector4f& other) : xmm(other.xmm) {}
80  SSEVector4f(__m128 val) : xmm(val) {}
81  explicit SSEVector4f(float val) : xmm(_mm_set1_ps(val)) {}
82  SSEVector4f(float f3, float f2, float f1, float f0) :
83  xmm(_mm_set_ps(f3, f2, f1, f0))
84  {}
85 
86  inline SSEVector4f& operator= (float val) {
87  xmm = _mm_set1_ps(val);
88  return *this;
89  }
90 
91  inline static SSEVector4f zero() {
92  return _mm_setzero_ps();
93  }
94 
95  operator __m128() const {
96  return xmm;
97  }
98 
99  friend SSEVector4f operator& (const SSEVector4f& a, const SSEVector4f& b) {
100  return _mm_and_ps(a.xmm, b.xmm);
101  }
102  friend SSEVector4f operator| (const SSEVector4f& a, const SSEVector4f& b) {
103  return _mm_or_ps(a.xmm, b.xmm);
104  }
105  friend SSEVector4f operator^ (const SSEVector4f& a, const SSEVector4f& b) {
106  return _mm_xor_ps(a.xmm, b.xmm);
107  }
108  /// ~a & b
109  friend SSEVector4f andnot(const SSEVector4f& a, const SSEVector4f& b) {
110  return _mm_andnot_ps(a.xmm, b.xmm);
111  }
112 
113  SSEVector4f& operator&= (const SSEVector4f& a) {
114  xmm = _mm_and_ps(xmm, a.xmm);
115  return *this;
116  }
117  SSEVector4f& operator|= (const SSEVector4f& a) {
118  xmm = _mm_or_ps(xmm, a.xmm);
119  return *this;
120  }
121  SSEVector4f& operator^= (const SSEVector4f& a) {
122  xmm = _mm_xor_ps(xmm, a.xmm);
123  return *this;
124  }
125 
126  friend SSEVector4f operator+ (const SSEVector4f& a, const SSEVector4f& b) {
127  return _mm_add_ps(a.xmm, b.xmm);
128  }
129  friend SSEVector4f operator- (const SSEVector4f& a, const SSEVector4f& b) {
130  return _mm_sub_ps(a.xmm, b.xmm);
131  }
132  friend SSEVector4f operator* (const SSEVector4f& a, const SSEVector4f& b) {
133  return _mm_mul_ps(a.xmm, b.xmm);
134  }
135  friend SSEVector4f operator/ (const SSEVector4f& a, const SSEVector4f& b) {
136  return _mm_div_ps(a.xmm, b.xmm);
137  }
138 
139  SSEVector4f& operator+= (const SSEVector4f& a) {
140  xmm = _mm_add_ps(xmm, a.xmm);
141  return *this;
142  }
143  SSEVector4f& operator-= (const SSEVector4f& a) {
144  xmm = _mm_sub_ps(xmm, a.xmm);
145  return *this;
146  }
147  SSEVector4f& operator*= (const SSEVector4f& a) {
148  xmm = _mm_mul_ps(xmm, a.xmm);
149  return *this;
150  }
151  SSEVector4f& operator/= (const SSEVector4f& a) {
152  xmm = _mm_div_ps(xmm, a.xmm);
153  return *this;
154  }
155 
156  /**
157  * \brief Newton-Rhapson Reciprocal:
158  * \f[ 2 * rcp(x) - (x * rcp(x) * rcp(x)) \f]
159  */
160  friend inline SSEVector4f rcp_nr(const SSEVector4f& v) {
161  __m128 x0 = _mm_rcp_ps(v.xmm);
162  return _mm_sub_ps(_mm_add_ps(x0,x0),
163  _mm_mul_ps(_mm_mul_ps(x0,v.xmm), x0));
164  }
165 
166  friend inline SSEVector4f rcp(const SSEVector4f& v) {
167  return _mm_rcp_ps(v.xmm);
168  }
169 
170  friend SSEVector4f min(const SSEVector4f& a, const SSEVector4f& b) {
171  return _mm_min_ps(a.xmm, b.xmm);
172  }
173  friend SSEVector4f max(const SSEVector4f& a, const SSEVector4f& b) {
174  return _mm_max_ps(a.xmm, b.xmm);
175  }
176 
177  friend SSEVector4f isnan(const SSEVector4f& a) {
178  return _mm_cmpunord_ps(a.xmm, a.xmm);
179  }
180  friend SSEVector4f isnan(const SSEVector4f& a, const SSEVector4f& b) {
181  return _mm_cmpunord_ps(a.xmm, b.xmm);
182  }
183 
184  /**
185  * \brief Moves either of the values of \c low into the low 64-bits
186  * of the result, and either of the values of \c high into
187  * the high 64-bits of the result. Each index in the
188  * template is a index in the range [0,3] to choose a value from the
189  * source, 0 being the lowest and 3 the highest.
190  */
191  template <int idx3, int idx2, int idx1, int idx0>
192  friend SSEVector4f shuffle(const SSEVector4f& low, const SSEVector4f& hi) {
193  return _mm_shuffle_ps(low.xmm,hi.xmm,_MM_SHUFFLE(idx3,idx2,idx1,idx0));
194  }
195 
196  /// Shuffles the elements of the given vector using the indices [0,3]
197  template <int idx3, int idx2, int idx1, int idx0>
198  friend SSEVector4f shuffle(const SSEVector4f& a) {
199  return _mm_shuffle_ps(a.xmm, a.xmm, _MM_SHUFFLE(idx3,idx2,idx1,idx0));
200  }
201 
202  /// a == b
203  friend SSEVector4f cmpeq(const SSEVector4f& a, const SSEVector4f& b) {
204  return _mm_cmpeq_ps(a.xmm, b.xmm);
205  }
206  /// a < b
207  friend SSEVector4f cmplt(const SSEVector4f& a, const SSEVector4f& b) {
208  return _mm_cmplt_ps(a.xmm, b.xmm);
209  }
210  /// a <= b
211  friend SSEVector4f cmple(const SSEVector4f& a, const SSEVector4f& b) {
212  return _mm_cmple_ps(a.xmm, b.xmm);
213  }
214  /// a > b
215  friend SSEVector4f cmpgt(const SSEVector4f& a, const SSEVector4f& b) {
216  return _mm_cmpgt_ps(a.xmm, b.xmm);
217  }
218  /// a >= b
219  friend SSEVector4f cmpge(const SSEVector4f& a, const SSEVector4f& b) {
220  return _mm_cmpge_ps(a.xmm, b.xmm);
221  }
222  /// a != b
223  friend SSEVector4f cmpneq(const SSEVector4f& a, const SSEVector4f& b) {
224  return _mm_cmpneq_ps(a.xmm, b.xmm);
225  }
226  /// !(a < b)
227  friend SSEVector4f cmpnlt(const SSEVector4f& a, const SSEVector4f& b) {
228  return _mm_cmpnlt_ps(a.xmm, b.xmm);
229  }
230  /// !(a <= b)
231  friend SSEVector4f cmpnle(const SSEVector4f& a, const SSEVector4f& b) {
232  return _mm_cmpnle_ps(a.xmm, b.xmm);
233  }
234  /// !(a > b)
235  friend SSEVector4f cmpngt(const SSEVector4f& a, const SSEVector4f& b) {
236  return _mm_cmpngt_ps(a.xmm, b.xmm);
237  }
238  /// !(a >= b)
239  friend SSEVector4f cmpnge(const SSEVector4f& a, const SSEVector4f& b) {
240  return _mm_cmpnge_ps(a.xmm, b.xmm);
241  }
242 
243  friend SSEVector4f operator==(const SSEVector4f& a, const SSEVector4f& b) {
244  return cmpeq(a, b);
245  }
246  friend SSEVector4f operator!=(const SSEVector4f& a, const SSEVector4f& b) {
247  return cmpneq(a, b);
248  }
249  friend SSEVector4f operator<(const SSEVector4f& a, const SSEVector4f& b) {
250  return cmplt(a, b);
251  }
252  friend SSEVector4f operator<=(const SSEVector4f& a, const SSEVector4f& b) {
253  return cmple(a, b);
254  }
255  friend SSEVector4f operator>(const SSEVector4f& a, const SSEVector4f& b) {
256  return cmpgt(a, b);
257  }
258  friend SSEVector4f operator>=(const SSEVector4f& a, const SSEVector4f& b) {
259  return cmpge(a, b);
260  }
261 
262  /// Select/blend operation <tt>(mask) ? a : b</tt>
263  friend inline SSEVector4f select(const SSEVector4f& mask,
264  const SSEVector4f& a, const SSEVector4f& b) {
265  // Alternative method by Jim Conyngham/Wikipedia MD5 page, via
266  // http://markplusplus.wordpress.com/2007/03/14/fast-sse-select-operation/ [July 2012]
267  return _mm_xor_ps(b.xmm, _mm_and_ps(mask.xmm, _mm_xor_ps(a.xmm, b.xmm)));
268  }
269 
270  /// Round \c a towards zero
271  friend inline SSEVector4f roundTruncate(const SSEVector4f& a) {
272  __m128i truncated = _mm_cvttps_epi32(a.xmm);
273  return _mm_cvtepi32_ps(truncated);
274  }
275 
276  /// Save to \c dest without polluting the cache
277  friend inline void stream(SSEVector4f* dest, const SSEVector4f& value) {
278  _mm_stream_ps(reinterpret_cast<float*>(dest), value.xmm);
279  }
280  /// Save to \c dest without polluting the cache
281  friend inline void stream(__m128* dest, const SSEVector4f& value) {
282  _mm_stream_ps(reinterpret_cast<float*>(dest), value.xmm);
283  }
284  /// Save to \c dest without polluting the cache
285  friend inline void stream(float* dest, const SSEVector4f& value) {
286  _mm_stream_ps(dest, value.xmm);
287  }
288 };
289 
290 
291 
293 {
294 private:
295  __m128i xmm;
296 
297 public:
299  SSEVector4i(const SSEVector4i& val) : xmm(val.xmm) {}
300  SSEVector4i(__m128i val) : xmm(val) {}
301  explicit SSEVector4i(int32_t val) : xmm(_mm_set1_epi32(val)) {}
302  SSEVector4i(int32_t i3, int32_t i2, int32_t i1, int32_t i0) :
303  xmm(_mm_set_epi32(i3, i2, i1, i0))
304  {}
305 
306  SSEVector4i& operator= (int32_t val) {
307  xmm = _mm_set1_epi32(val);
308  return *this;
309  }
310 
311  inline static SSEVector4i zero() {
312  return _mm_setzero_si128();
313  }
314 
315  operator __m128i() const {
316  return xmm;
317  }
318 
319  friend SSEVector4i operator& (const SSEVector4i& a, const SSEVector4i& b) {
320  return _mm_and_si128(a.xmm, b.xmm);
321  }
322  friend SSEVector4i operator| (const SSEVector4i& a, const SSEVector4i& b) {
323  return _mm_or_si128(a.xmm, b.xmm);
324  }
325  friend SSEVector4i operator^ (const SSEVector4i& a, const SSEVector4i& b) {
326  return _mm_xor_si128(a.xmm, b.xmm);
327  }
328  /// ~a & b
329  friend SSEVector4i andnot(const SSEVector4i& a, const SSEVector4i& b) {
330  return _mm_andnot_si128(a.xmm, b.xmm);
331  }
332  SSEVector4i& operator&= (const SSEVector4i& a) {
333  xmm = _mm_and_si128(xmm, a.xmm);
334  return *this;
335  }
336  SSEVector4i& operator|= (const SSEVector4i& a) {
337  xmm = _mm_or_si128(xmm, a.xmm);
338  return *this;
339  }
340  SSEVector4i& operator^= (const SSEVector4i& a) {
341  xmm = _mm_xor_si128(xmm, a.xmm);
342  return *this;
343  }
344 
345  friend SSEVector4i operator+ (const SSEVector4i& a, const SSEVector4i& b) {
346  return _mm_add_epi32(a.xmm, b.xmm);
347  }
348  friend SSEVector4i operator- (const SSEVector4i& a, const SSEVector4i& b) {
349  return _mm_sub_epi32(a.xmm, b.xmm);
350  }
351  SSEVector4i& operator+= (const SSEVector4i& a) {
352  xmm = _mm_add_epi32(xmm, a.xmm);
353  return *this;
354  }
355  SSEVector4i& operator-= (const SSEVector4i& a) {
356  xmm = _mm_sub_epi32(xmm, a.xmm);
357  return *this;
358  }
359 
360  /// Test if all elements are zero
361  inline bool isZero() const {
362  const __m128i mask = _mm_cmpeq_epi32(xmm, _mm_setzero_si128());
363  return _mm_movemask_epi8(mask) == 0xFFFF;
364  }
365 
366  /// a == b
367  friend SSEVector4i cmpeq(const SSEVector4i& a, const SSEVector4i& b) {
368  return _mm_cmpeq_epi32(a.xmm, b.xmm);
369  }
370  /// a < b
371  friend SSEVector4i cmplt(const SSEVector4i& a, const SSEVector4i& b) {
372  return _mm_cmplt_epi32(a.xmm, b.xmm);
373  }
374  /// a > b
375  friend SSEVector4i cmpgt(const SSEVector4i& a, const SSEVector4i& b) {
376  return _mm_cmpgt_epi32(a.xmm, b.xmm);
377  }
378  friend SSEVector4i operator==(const SSEVector4i& a, const SSEVector4i& b) {
379  return cmpeq(a, b);
380  }
381  friend SSEVector4i operator<(const SSEVector4i& a, const SSEVector4i& b) {
382  return cmplt(a, b);
383  }
384  friend SSEVector4i operator>(const SSEVector4i& a, const SSEVector4i& b) {
385  return cmpgt(a, b);
386  }
387 
388  /// Select/blend: <tt>(mask) ? a : b</tt>
389  friend inline SSEVector4i select(const SSEVector4i& mask,
390  const SSEVector4i& a, const SSEVector4i& b) {
391  // Alternative method by Jim Conyngham/Wikipedia MD5 page, via
392  // http://markplusplus.wordpress.com/2007/03/14/fast-sse-select-operation/ [July 2012]
393  return _mm_xor_si128(b.xmm,
394  _mm_and_si128(mask.xmm, _mm_xor_si128(a.xmm, b.xmm)));
395  }
396 
397  template <int32_t i3, int32_t i2, int32_t i1, int32_t i0>
398  static const __m128i& constant() {
399  static const union {
400  int32_t i32[4];
401  __m128i xmm;
402  } u = {{i0, i1, i2, i3}};
403  return u.xmm;
404  }
405 
406  template <int32_t value>
407  static const __m128i& constant() {
408  static const union {
409  int32_t i32[4];
410  __m128i xmm;
411  } u = {{value, value, value, value}};
412  return u.xmm;
413  }
414 
415  /// Shift right by \c count bits while shifting in zeros
416  friend inline SSEVector4i srl(const SSEVector4i& a, int count) {
417  return _mm_srli_epi32(a.xmm, count);
418  }
419 
420  /// Shift left by \c count bits while shifting in zeros
421  friend inline SSEVector4i sll(const SSEVector4i& a, int count) {
422  return _mm_slli_epi32(a.xmm, count);
423  }
424 
425  /// Save to \c dest without polluting the cache
426  friend inline void stream(SSEVector4i* dest, const SSEVector4i& value) {
427  _mm_stream_si128(&(dest->xmm), value);
428  }
429  /// Save to \c dest without polluting the cache
430  friend inline void stream(__m128i* dest, const SSEVector4i& value) {
431  _mm_stream_si128(dest, value);
432  }
433 };
434 
435 /// Reinterprets \c as a \c SSEVector4i
437  return _mm_castps_si128(a);
438 }
439 /// Convert \c a to integer using truncate
440 inline SSEVector4i toInt(const SSEVector4f& a) {
441  return _mm_cvttps_epi32(a);
442 }
443 /// Converts \c a to integer using round
445  return _mm_cvtps_epi32(a);
446 }
447 
448 /// Reinterprets \c a as a \c SSEVector4f
450  return _mm_castsi128_ps(a);
451 }
452 /// Convert \c a to floating point
453 inline SSEVector4f toFloat(const SSEVector4i& a) {
454  return _mm_cvtepi32_ps(a);
455 }
456 
457 /**
458  * \brief The arguments <tt>row0</tt>, <tt>row1</tt>, <tt>row2</tt> and
459  * <tt>row3</tt> are \c __m128 values whose elements form the corresponding
460  * rows of a 4-by-4 matrix. The matrix transposition is returned in
461  * arguments <tt>row0</tt>, <tt>row1</tt>, <tt>row2</tt> and <tt>row3</tt>
462  * where \c row0 now holds column 0 of the original matrix, \c row1 now
463  * holds column 1 of the original matrix, and so on.
464  * \author Intel Intrinsics Guide for AVX2
465  */
466 FINLINE void transpose(SSEVector4f& row0, SSEVector4f& row1,
467  SSEVector4f& row2, SSEVector4f& row3) {
468  __m128 tmp3, tmp2, tmp1, tmp0;
469  tmp0 = _mm_unpacklo_ps(row0, row1);
470  tmp2 = _mm_unpacklo_ps(row2, row3);
471  tmp1 = _mm_unpackhi_ps(row0, row1);
472  tmp3 = _mm_unpackhi_ps(row2, row3);
473 
474  row0 = _mm_movelh_ps(tmp0, tmp2);
475  row1 = _mm_movehl_ps(tmp2, tmp0);
476  row2 = _mm_movelh_ps(tmp1, tmp3);
477  row3 = _mm_movehl_ps(tmp3, tmp1);
478 }
479 
480 } // namespace sse
481 
483 
484 #endif /* __MITSUBA_CORE_SSEVECTOR_H_ */
SSEVector4i(int32_t i3, int32_t i2, int32_t i1, int32_t i0)
Definition: ssevector.h:302
SSEVector4i(int32_t val)
Definition: ssevector.h:301
friend SSEVector4f cmple(const SSEVector4f &a, const SSEVector4f &b)
a &lt;= b
Definition: ssevector.h:211
SSEVector4i castAsInt(const SSEVector4f &a)
Reinterprets as a SSEVector4i.
Definition: ssevector.h:436
friend SSEVector4f select(const SSEVector4f &mask, const SSEVector4f &a, const SSEVector4f &b)
Select/blend operation (mask) ? a : b
Definition: ssevector.h:263
bool isZero() const
Test if all elements are zero.
Definition: ssevector.h:361
SSEVector4f castAsFloat(const SSEVector4i &a)
Reinterprets a as a SSEVector4f.
Definition: ssevector.h:449
friend SSEVector4f max(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:173
Matrix< M1, N2, T > operator*(const Matrix< M1, N1, T > &mat1, const Matrix< M2, N2, T > &mat2)
Matrix multiplication (creates a temporary)
Definition: matrix.h:745
friend SSEVector4f cmpnlt(const SSEVector4f &a, const SSEVector4f &b)
!(a &lt; b)
Definition: ssevector.h:227
SSEVector4i()
Definition: ssevector.h:298
friend void stream(SSEVector4i *dest, const SSEVector4i &value)
Save to dest without polluting the cache.
Definition: ssevector.h:426
friend SSEVector4i select(const SSEVector4i &mask, const SSEVector4i &a, const SSEVector4i &b)
Select/blend: (mask) ? a : b
Definition: ssevector.h:389
friend SSEVector4i cmpeq(const SSEVector4i &a, const SSEVector4i &b)
a == b
Definition: ssevector.h:367
friend SSEVector4i operator>(const SSEVector4i &a, const SSEVector4i &b)
Definition: ssevector.h:384
SSEVector4f(float val)
Definition: ssevector.h:81
friend SSEVector4f cmpnle(const SSEVector4f &a, const SSEVector4f &b)
!(a &lt;= b)
Definition: ssevector.h:231
Definition: ssevector.h:72
friend SSEVector4f operator<(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:249
SSEVector4i(__m128i val)
Definition: ssevector.h:300
friend void stream(__m128 *dest, const SSEVector4f &value)
Save to dest without polluting the cache.
Definition: ssevector.h:281
friend SSEVector4f cmpge(const SSEVector4f &a, const SSEVector4f &b)
a &gt;= b
Definition: ssevector.h:219
Definition: ssevector.h:292
static const __m128i & constant()
Definition: ssevector.h:398
friend SSEVector4f operator==(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:243
#define MTS_NAMESPACE_BEGIN
Definition: platform.h:137
friend SSEVector4i operator==(const SSEVector4i &a, const SSEVector4i &b)
Definition: ssevector.h:378
FINLINE void transpose(SSEVector4f &row0, SSEVector4f &row1, SSEVector4f &row2, SSEVector4f &row3)
The arguments row0, row1, row2 and row3 are __m128 values whose elements form the corresponding rows ...
Definition: ssevector.h:466
SSEVector4f toFloat(const SSEVector4i &a)
Convert a to floating point.
Definition: ssevector.h:453
friend SSEVector4f rcp(const SSEVector4f &v)
Definition: ssevector.h:166
friend SSEVector4f cmpeq(const SSEVector4f &a, const SSEVector4f &b)
a == b
Definition: ssevector.h:203
friend SSEVector4f operator!=(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:246
friend SSEVector4f rcp_nr(const SSEVector4f &v)
Newton-Rhapson Reciprocal: .
Definition: ssevector.h:160
friend SSEVector4f cmplt(const SSEVector4f &a, const SSEVector4f &b)
a &lt; b
Definition: ssevector.h:207
friend SSEVector4f operator>(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:255
SSEVector4i(const SSEVector4i &val)
Definition: ssevector.h:299
friend SSEVector4f min(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:170
friend SSEVector4f cmpnge(const SSEVector4f &a, const SSEVector4f &b)
!(a &gt;= b)
Definition: ssevector.h:239
SSEVector4f shuffle(const SSEVector4f &a)
Definition: ssevector.h:198
friend void stream(SSEVector4f *dest, const SSEVector4f &value)
Save to dest without polluting the cache.
Definition: ssevector.h:277
SSEVector4i toInt(const SSEVector4f &a)
Convert a to integer using truncate.
Definition: ssevector.h:440
friend SSEVector4f cmpgt(const SSEVector4f &a, const SSEVector4f &b)
a &gt; b
Definition: ssevector.h:215
SSEVector4f(const SSEVector4f &other)
Definition: ssevector.h:79
friend SSEVector4f roundTruncate(const SSEVector4f &a)
Round a towards zero.
Definition: ssevector.h:271
friend SSEVector4f cmpneq(const SSEVector4f &a, const SSEVector4f &b)
a != b
Definition: ssevector.h:223
SSEVector4f()
Definition: ssevector.h:78
friend SSEVector4f andnot(const SSEVector4f &a, const SSEVector4f &b)
~a &amp; b
Definition: ssevector.h:109
friend SSEVector4i cmplt(const SSEVector4i &a, const SSEVector4i &b)
a &lt; b
Definition: ssevector.h:371
friend SSEVector4f operator<=(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:252
SSEVector4f(__m128 val)
Definition: ssevector.h:80
friend SSEVector4i operator<(const SSEVector4i &a, const SSEVector4i &b)
Definition: ssevector.h:381
friend SSEVector4i srl(const SSEVector4i &a, int count)
Shift right by count bits while shifting in zeros.
Definition: ssevector.h:416
static const __m128i & constant()
Definition: ssevector.h:407
friend SSEVector4i andnot(const SSEVector4i &a, const SSEVector4i &b)
~a &amp; b
Definition: ssevector.h:329
static SSEVector4f zero()
Definition: ssevector.h:91
friend SSEVector4i cmpgt(const SSEVector4i &a, const SSEVector4i &b)
a &gt; b
Definition: ssevector.h:375
friend SSEVector4f isnan(const SSEVector4f &a)
Definition: ssevector.h:177
friend SSEVector4f isnan(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:180
int roundToInt(float value)
Integer round function (single precision)
Definition: math.h:106
friend void stream(__m128i *dest, const SSEVector4i &value)
Save to dest without polluting the cache.
Definition: ssevector.h:430
friend SSEVector4f cmpngt(const SSEVector4f &a, const SSEVector4f &b)
!(a &gt; b)
Definition: ssevector.h:235
friend SSEVector4i sll(const SSEVector4i &a, int count)
Shift left by count bits while shifting in zeros.
Definition: ssevector.h:421
#define MTS_NAMESPACE_END
Definition: platform.h:138
friend SSEVector4f operator>=(const SSEVector4f &a, const SSEVector4f &b)
Definition: ssevector.h:258
static SSEVector4i zero()
Definition: ssevector.h:311
friend void stream(float *dest, const SSEVector4f &value)
Save to dest without polluting the cache.
Definition: ssevector.h:285
SSEVector4f(float f3, float f2, float f1, float f0)
Definition: ssevector.h:82