20 #if !defined(__MITSUBA_RENDER_TRIACCEL_SSE_H_)
21 #define __MITSUBA_RENDER_TRIACCEL_SSE_H_
28 __m128 mint, __m128 maxt, __m128 inactive,
Intersection4 &its) {
29 static const MM_ALIGN16
int waldModulo[4] = { 1, 2, 0, 1 };
30 const int ku = waldModulo[tri.
k], kv = waldModulo[tri.
k+1];
34 o_u = packet.
o[ku].ps, o_v = packet.
o[kv].ps, o_k = packet.
o[tri.
k].ps,
35 d_u = packet.
d[ku].ps, d_v = packet.
d[kv].ps, d_k = packet.
d[tri.
k].ps;
39 line1 = _mm_load_ps((
const float *) &tri),
40 n_u = splat_ps(line1, 1),
41 n_v = splat_ps(line1, 2),
42 n_d = splat_ps(line1, 3);
45 ounu = _mm_mul_ps(o_u, n_u),
46 ovnv = _mm_mul_ps(o_v, n_v),
47 dunu = _mm_mul_ps(d_u, n_u),
48 dvnv = _mm_mul_ps(d_v, n_v);
52 num = _mm_sub_ps(_mm_sub_ps(_mm_sub_ps(n_d, ounu), ovnv), o_k),
53 denom = _mm_add_ps(_mm_add_ps(dunu, dvnv), d_k);
56 t = _mm_div_ps(num, denom);
59 _mm_andnot_ps(inactive, _mm_and_ps(_mm_cmpgt_ps(maxt, t), _mm_cmpgt_ps(t, mint)));
61 if (_mm_movemask_ps(hasIts) == 0)
66 line2 = _mm_load_ps(&tri.
a_u),
67 a_u = splat_ps(line2, 0),
68 a_v = splat_ps(line2, 1),
69 b_nu = splat_ps(line2, 2),
70 b_nv = splat_ps(line2, 3);
73 hu = _mm_add_ps(o_u, _mm_sub_ps(_mm_mul_ps(t, d_u), a_u)),
74 hv = _mm_add_ps(o_v, _mm_sub_ps(_mm_mul_ps(t, d_v), a_v));
78 line3 = _mm_load_ps(&tri.
c_nu),
79 c_nu = splat_ps(line3, 0),
80 c_nv = splat_ps(line3, 1);
82 primIndex = splat_epi32(pstoepi32(line3), 3),
83 shapeIndex = splat_epi32(pstoepi32(line3), 2);
86 u = _mm_add_ps(_mm_mul_ps(hv, b_nu), _mm_mul_ps(hu, b_nv)),
87 v = _mm_add_ps(_mm_mul_ps(hu, c_nu), _mm_mul_ps(hv, c_nv));
90 zero = _mm_setzero_ps(),
91 term1 = _mm_cmpge_ps(u, zero),
92 term2 = _mm_cmpge_ps(v, zero),
93 term3 = _mm_add_ps(u, v);
96 term4 = _mm_and_ps(term1, term2),
97 term5 = _mm_cmpge_ps(SSEConstants::one.ps, term3);
99 hasIts = _mm_and_ps(hasIts, _mm_and_ps(term4, term5));
101 if (_mm_movemask_ps(hasIts) == 0)
104 its.
t.ps = mux_ps(hasIts, t, its.
t.ps);
105 its.
u.ps = mux_ps(hasIts, u, its.
u.ps);
106 its.
v.ps = mux_ps(hasIts, v, its.
v.ps);
FINLINE __m128 rayIntersectPacket(const TriAccel &tri, const RayPacket4 &packet, __m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its)
Definition: triaccel_sse.h:27
SSEVector t
Definition: ray_sse.h:75
SSEVector u
Definition: ray_sse.h:76
Float a_u
Definition: triaccel.h:43
SSEVector v
Definition: ray_sse.h:77
SIMD quad-packed ray for coherent ray tracing.
Definition: ray_sse.h:34
SSEVector shapeIndex
Definition: ray_sse.h:79
QuadVector o
Definition: ray_sse.h:35
SSEVector primIndex
Definition: ray_sse.h:78
QuadVector d
Definition: ray_sse.h:35
uint32_t k
Definition: triaccel.h:38
Pre-computed triangle representation based on Ingo Wald's TriAccel layout.
Definition: triaccel.h:37
Float c_nu
Definition: triaccel.h:48