Mitsuba Renderer  0.5.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gkdtree.h
Go to the documentation of this file.
1 /*
2  This file is part of Mitsuba, a physically based rendering system.
3 
4  Copyright (c) 2007-2014 by Wenzel Jakob and others.
5 
6  Mitsuba is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License Version 3
8  as published by the Free Software Foundation.
9 
10  Mitsuba is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #pragma once
20 #if !defined(__MITSUBA_RENDER_GKDTREE_H_)
21 #define __MITSUBA_RENDER_GKDTREE_H_
22 
23 #include <mitsuba/core/timer.h>
24 #include <mitsuba/core/lock.h>
25 #include <boost/static_assert.hpp>
26 #include <stack>
27 
28 #if defined(__LINUX__)
29 #include <malloc.h>
30 #endif
31 
32 /// Activate lots of extra checks
33 //#define MTS_KD_DEBUG 1
34 
35 /** Compile-time KD-tree depth limit. Allows to put certain
36  data structures on the stack */
37 #define MTS_KD_MAXDEPTH 48
38 
39 /// OrderedChunkAllocator: don't create chunks smaller than 512 KiB
40 #define MTS_KD_MIN_ALLOC 512*1024
41 
42 /// Allocate nodes & index lists in blocks of 512 KiB
43 #define MTS_KD_BLOCKSIZE_KD (512*1024/sizeof(KDNode))
44 #define MTS_KD_BLOCKSIZE_IDX (512*1024/sizeof(uint32_t))
45 
46 /**
47  * \brief To avoid numerical issues, the size of the scene
48  * bounding box is increased by this amount
49  */
50 #define MTS_KD_AABB_EPSILON 1e-3f
51 
52 #if defined(MTS_KD_DEBUG)
53 #define KDAssert(expr) SAssert(expr)
54 #define KDAssertEx(expr, text) SAssertEx(expr, text)
55 #else
56 #define KDAssert(expr)
57 #define KDAssertEx(expr, text)
58 #endif
59 
61 
62 
63 /**
64  * \brief Special "ordered" memory allocator
65  *
66  * During kd-tree construction, large amounts of memory are required
67  * to temporarily hold index and edge event lists. When not implemented
68  * properly, these allocations can become a critical bottleneck.
69  * The class \ref OrderedChunkAllocator provides a specialized
70  * memory allocator, which reserves memory in chunks of at least
71  * 128KiB. An important assumption made by the allocator is that
72  * memory will be released in the exact same order, in which it was
73  * previously allocated. This makes it possible to create an
74  * implementation with a very low memory overhead. Note that no locking
75  * is done, hence each thread will need its own allocator.
76  *
77  * \author Wenzel Jakob
78  */
80 public:
81  inline OrderedChunkAllocator(size_t minAllocation = MTS_KD_MIN_ALLOC)
82  : m_minAllocation(minAllocation) {
83  m_chunks.reserve(16);
84  }
85 
87  cleanup();
88  }
89 
90  /**
91  * \brief Release all memory used by the allocator
92  */
93  void cleanup() {
94  for (std::vector<Chunk>::iterator it = m_chunks.begin();
95  it != m_chunks.end(); ++it)
96  freeAligned((*it).start);
97  m_chunks.clear();
98  }
99 
100  /**
101  * \brief Merge the chunks of another allocator into this one
102  */
103  void merge(const OrderedChunkAllocator &other) {
104  m_chunks.reserve(m_chunks.size() + other.m_chunks.size());
105  m_chunks.insert(m_chunks.end(), other.m_chunks.begin(),
106  other.m_chunks.end());
107  }
108 
109  /**
110  * \brief Forget about all chunks without actually freeing them.
111  * This is useful when the chunks have been merged into another
112  * allocator.
113  */
114  void forget() {
115  m_chunks.clear();
116  }
117 
118  /**
119  * \brief Request a block of memory from the allocator
120  *
121  * Walks through the list of chunks to find one with enough
122  * free memory. If no chunk could be found, a new one is created.
123  */
124  template <typename T> T * __restrict allocate(size_t size) {
125  size *= sizeof(T);
126  for (std::vector<Chunk>::iterator it = m_chunks.begin();
127  it != m_chunks.end(); ++it) {
128  Chunk &chunk = *it;
129  if (chunk.remainder() >= size) {
130  T* result = reinterpret_cast<T *>(chunk.cur);
131  chunk.cur += size;
132  return result;
133  }
134  }
135 
136  /* No chunk had enough free memory */
137  size_t allocSize = std::max(size,
138  m_minAllocation);
139 
140  Chunk chunk;
141  chunk.start = (uint8_t *) allocAligned(allocSize);
142  chunk.cur = chunk.start + size;
143  chunk.size = allocSize;
144  m_chunks.push_back(chunk);
145 
146  return reinterpret_cast<T *>(chunk.start);
147  }
148 
149  template <typename T> void release(T *ptr) {
150  for (std::vector<Chunk>::iterator it = m_chunks.begin();
151  it != m_chunks.end(); ++it) {
152  Chunk &chunk = *it;
153  if ((uint8_t *) ptr >= chunk.start &&
154  (uint8_t *) ptr < chunk.start + chunk.size) {
155  chunk.cur = (uint8_t *) ptr;
156  return;
157  }
158  }
159 #if defined(MTS_KD_DEBUG)
160  /* Uh oh, allocation could not be found. Check if it has size==0 */
161  for (std::vector<Chunk>::iterator it = m_chunks.begin();
162  it != m_chunks.end(); ++it) {
163  const Chunk &chunk = *it;
164  if ((uint8_t *) ptr == chunk.start + chunk.size)
165  return;
166  }
167  SLog(EError, "OrderedChunkAllocator: Internal error while"
168  " releasing memory");
169 #endif
170  }
171 
172  /**
173  * \brief Shrink the size of the last allocated chunk
174  */
175  template <typename T> void shrinkAllocation(T *ptr, size_t newSize) {
176  newSize *= sizeof(T);
177  for (std::vector<Chunk>::iterator it = m_chunks.begin();
178  it != m_chunks.end(); ++it) {
179  Chunk &chunk = *it;
180  if ((uint8_t *) ptr >= chunk.start &&
181  (uint8_t *) ptr < chunk.start + chunk.size) {
182  chunk.cur = (uint8_t *) ptr + newSize;
183  return;
184  }
185  }
186 #if defined(MTS_KD_DEBUG)
187  /* Uh oh, allocation could not be found. Check if it has size == 0 */
188  if (newSize == 0) {
189  for (std::vector<Chunk>::iterator it = m_chunks.begin();
190  it != m_chunks.end(); ++it) {
191  const Chunk &chunk = *it;
192  if ((uint8_t *) ptr == chunk.start + chunk.size)
193  return;
194  }
195  }
196  SLog(EError, "OrderedChunkAllocator: Internal error while"
197  " releasing memory");
198 #endif
199  }
200 
201  inline size_t getChunkCount() const { return m_chunks.size(); }
202 
203  /**
204  * \brief Return the total amount of chunk memory in bytes
205  */
206  size_t size() const {
207  size_t result = 0;
208  for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
209  it != m_chunks.end(); ++it)
210  result += (*it).size;
211  return result;
212  }
213 
214  /**
215  * \brief Return the total amount of used memory in bytes
216  */
217  size_t used() const {
218  size_t result = 0;
219  for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
220  it != m_chunks.end(); ++it)
221  result += (*it).used();
222  return result;
223  }
224 
225  /**
226  * \brief Return a string representation of the chunks
227  */
228  std::string toString() const {
229  std::ostringstream oss;
230  oss << "OrderedChunkAllocator[" << endl;
231  for (size_t i=0; i<m_chunks.size(); ++i)
232  oss << " Chunk " << i << ": " << m_chunks[i].toString() << endl;
233  oss << "]";
234  return oss.str();
235  }
236 
237 private:
238  struct Chunk {
239  size_t size;
240  uint8_t *start, *cur;
241 
242  inline size_t used() const {
243  return cur - start;
244  }
245 
246  inline size_t remainder() const {
247  return size - used();
248  }
249 
250  std::string toString() const {
251  return formatString("0x%llx-0x%llx (size=" SIZE_T_FMT
252  ", used=" SIZE_T_FMT ")", start, start+size,
253  size, used());
254  }
255  };
256 
257  size_t m_minAllocation;
258  std::vector<Chunk> m_chunks;
259 };
260 
261 /**
262  * \brief Basic vector implementation, which stores all data
263  * in a list of fixed-sized blocks.
264  *
265  * This leads to a more conservative memory usage when the
266  * final size of a (possibly very large) growing vector is
267  * unknown. Also, frequent reallocations & copies are avoided.
268  *
269  * \author Wenzel Jakob
270  */
271 template <typename T, size_t BlockSize> class BlockedVector {
272 public:
273  BlockedVector() : m_pos(0) {}
274 
276  clear();
277  }
278 
279  /**
280  * \brief Append an element to the end
281  */
282  inline void push_back(const T &value) {
283  size_t blockIdx = m_pos / BlockSize;
284  size_t offset = m_pos % BlockSize;
285  if (blockIdx == m_blocks.size())
286  m_blocks.push_back(new T[BlockSize]);
287  m_blocks[blockIdx][offset] = value;
288  m_pos++;
289  }
290 
291  /**
292  * \brief Allocate a certain number of elements and
293  * return a pointer to the first one.
294  *
295  * The implementation will ensure that they lie
296  * contiguous in memory -- note that this can potentially
297  * create unused elements in the previous block if a new
298  * one has to be allocated.
299  */
300  inline T * __restrict allocate(size_t size) {
301 #if defined(MTS_KD_DEBUG)
302  SAssert(size <= BlockSize);
303 #endif
304  size_t blockIdx = m_pos / BlockSize;
305  size_t offset = m_pos % BlockSize;
306  T *result;
307  if (EXPECT_TAKEN(offset + size <= BlockSize)) {
308  if (blockIdx == m_blocks.size())
309  m_blocks.push_back(new T[BlockSize]);
310  result = m_blocks[blockIdx] + offset;
311  m_pos += size;
312  } else {
313  ++blockIdx;
314  if (blockIdx == m_blocks.size())
315  m_blocks.push_back(new T[BlockSize]);
316  result = m_blocks[blockIdx];
317  m_pos += BlockSize - offset + size;
318  }
319  return result;
320  }
321 
322  inline T &operator[](size_t index) {
323  return *(m_blocks[index / BlockSize] +
324  (index % BlockSize));
325  }
326 
327  inline const T &operator[](size_t index) const {
328  return *(m_blocks[index / BlockSize] +
329  (index % BlockSize));
330  }
331 
332 
333  /**
334  * \brief Return the currently used number of items
335  */
336  inline size_t size() const {
337  return m_pos;
338  }
339 
340  /**
341  * \brief Return the number of allocated blocks
342  */
343  inline size_t blockCount() const {
344  return m_blocks.size();
345  }
346 
347  /**
348  * \brief Return the total capacity
349  */
350  inline size_t capacity() const {
351  return m_blocks.size() * BlockSize;
352  }
353 
354  /**
355  * \brief Resize the vector to the given size.
356  *
357  * Note: this implementation doesn't support
358  * enlarging the vector and simply changes the
359  * last item pointer.
360  */
361  inline void resize(size_t pos) {
362 #if defined(MTS_KD_DEBUG)
363  SAssert(pos <= capacity());
364 #endif
365  m_pos = pos;
366  }
367 
368  /**
369  * \brief Release all memory
370  */
371  void clear() {
372  for (typename std::vector<T *>::iterator it = m_blocks.begin();
373  it != m_blocks.end(); ++it)
374  delete[] *it;
375  m_blocks.clear();
376  m_pos = 0;
377  }
378 private:
379  std::vector<T *> m_blocks;
380  size_t m_pos;
381 };
382 
383 /**
384  * \brief Compact storage for primitive classifcation
385  *
386  * When classifying primitives with respect to a split plane,
387  * a data structure is needed to hold the tertiary result of
388  * this operation. This class implements a compact storage
389  * (2 bits per entry) in the spirit of the std::vector<bool>
390  * specialization.
391  *
392  * \author Wenzel Jakob
393  */
395 public:
397  : m_buffer(NULL), m_bufferSize(0) { }
398 
400  if (m_buffer)
401  delete[] m_buffer;
402  }
403 
404  void setPrimitiveCount(size_t size) {
405  if (m_buffer)
406  delete[] m_buffer;
407  if (size > 0) {
408  m_bufferSize = size/4 + ((size % 4) > 0 ? 1 : 0);
409  m_buffer = new uint8_t[m_bufferSize];
410  } else {
411  m_buffer = NULL;
412  }
413  }
414 
415  inline void set(uint32_t index, int value) {
416  uint8_t *ptr = m_buffer + (index >> 2);
417  uint8_t shift = (index & 3) << 1;
418  *ptr = (*ptr & ~(3 << shift)) | (value << shift);
419  }
420 
421  inline int get(uint32_t index) const {
422  uint8_t *ptr = m_buffer + (index >> 2);
423  uint8_t shift = (index & 3) << 1;
424  return (*ptr >> shift) & 3;
425  }
426 
427  inline size_t size() const {
428  return m_bufferSize;
429  }
430 private:
431  uint8_t *m_buffer;
432  size_t m_bufferSize;
433 };
434 
435 /**
436  * \brief Base class of all kd-trees.
437  *
438  * This class defines the byte layout for KD-tree nodes and
439  * provides methods for querying the tree structure.
440  */
441 template <typename AABBType> class KDTreeBase : public Object {
442 public:
443  /// Index number format (max 2^32 prims)
445 
446  /// Size number format
448 
449  /**
450  * \brief KD-tree node in 8 bytes.
451  */
452  struct KDNode {
453  union {
454  /* Inner node */
455  struct {
456  /* Bit layout:
457  31 : False (inner node)
458  30 : Indirection node flag
459  29-3 : Offset to the left child
460  or indirection table entry
461  2-0 : Split axis
462  */
464 
465  /// Split plane coordinate
466  float split;
467  } inner;
468 
469  /* Leaf node */
470  struct {
471  /* Bit layout:
472  31 : True (leaf node)
473  30-0 : Offset to the node's primitive list
474  */
475  uint32_t combined;
476 
477  /// End offset of the primitive list
479  } leaf;
480  };
481 
482  enum EMask {
483  ETypeMask = 1 << 31,
484  EIndirectionMask = 1 << 30,
485  ELeafOffsetMask = ~ETypeMask,
486  EInnerAxisMask = 0x3,
487  EInnerOffsetMask = ~(EInnerAxisMask + EIndirectionMask),
488  ERelOffsetLimit = (1<<28) - 1
489  };
490 
491  /// Initialize a leaf kd-Tree node
492  inline void initLeafNode(unsigned int offset, unsigned int numPrims) {
493  leaf.combined = (uint32_t) ETypeMask | offset;
494  leaf.end = offset + numPrims;
495  }
496 
497  /**
498  * Initialize an interior kd-Tree node. Reports a failure if the
499  * relative offset to the left child node is too large.
500  */
501  inline bool initInnerNode(int axis, float split, ptrdiff_t relOffset) {
502  if (relOffset < 0 || relOffset > ERelOffsetLimit)
503  return false;
504  inner.combined = axis | ((uint32_t) relOffset << 2);
505  inner.split = split;
506  return true;
507  }
508 
509  /**
510  * \brief Initialize an interior indirection node.
511  *
512  * Indirections are necessary whenever the children cannot be
513  * referenced using a relative pointer, which can happen when
514  * they lie in different memory chunks. In this case, the node
515  * stores an index into a globally shared pointer list.
516  */
517  inline void initIndirectionNode(int axis, float split,
518  uint32_t indirectionEntry) {
519  inner.combined = EIndirectionMask
520  | ((uint32_t) indirectionEntry << 2)
521  | axis;
522  inner.split = split;
523  }
524 
525  /// Is this a leaf node?
526  FINLINE bool isLeaf() const {
527  return leaf.combined & (uint32_t) ETypeMask;
528  }
529 
530  /// Is this an indirection node?
531  FINLINE bool isIndirection() const {
532  return leaf.combined & (uint32_t) EIndirectionMask;
533  }
534 
535  /// Assuming this is a leaf node, return the first primitive index
536  FINLINE IndexType getPrimStart() const {
537  return leaf.combined & (uint32_t) ELeafOffsetMask;
538  }
539 
540  /// Assuming this is a leaf node, return the last primitive index
541  FINLINE IndexType getPrimEnd() const {
542  return leaf.end;
543  }
544 
545  /// Return the index of an indirection node
546  FINLINE IndexType getIndirectionIndex() const {
547  return(inner.combined & (uint32_t) EInnerOffsetMask) >> 2;
548  }
549 
550  /// Return the left child (assuming that this is an interior node)
551  FINLINE const KDNode * __restrict getLeft() const {
552  return this +
553  ((inner.combined & (uint32_t) EInnerOffsetMask) >> 2);
554  }
555 
556  /// Return the sibling of the current node
557  FINLINE const KDNode * __restrict getSibling() const {
558  return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8);
559  }
560 
561  /// Return the left child (assuming that this is an interior node)
562  FINLINE KDNode * __restrict getLeft() {
563  return this +
564  ((inner.combined & (uint32_t) EInnerOffsetMask) >> 2);
565  }
566 
567  /// Return the left child (assuming that this is an interior node)
568  FINLINE const KDNode * __restrict getRight() const {
569  return getLeft() + 1;
570  }
571 
572  /**
573  * \brief Return the split plane location (assuming that this
574  * is an interior node)
575  */
576  FINLINE float getSplit() const {
577  return inner.split;
578  }
579 
580  /// Return the split axis (assuming that this is an interior node)
581  FINLINE int getAxis() const {
582  return inner.combined & (uint32_t) EInnerAxisMask;
583  }
584 
585  /// Return a string representation
586  std::string toString() const {
587  std::ostringstream oss;
588  if (isLeaf()) {
589  oss << "KDNode[leaf, primStart=" << getPrimStart()
590  << ", primCount=" << getPrimEnd()-getPrimStart() << "]";
591  } else {
592  oss << "KDNode[interior, axis=" << getAxis()
593  << ", split=" << getAxis()
594  << ", leftOffset="
595  << ((inner.combined & EInnerOffsetMask) >> 2)
596  << "]";
597  }
598  return oss.str();
599  }
600  };
601  BOOST_STATIC_ASSERT(sizeof(KDNode) == 8);
602 
603  /// Return the log level of kd-tree status messages
604  inline ELogLevel getLogLevel() const { return m_logLevel; }
605 
606  /// Return the log level of kd-tree status messages
607  inline void setLogLevel(ELogLevel level) { m_logLevel = level; }
608 
609  /// Return the root node of the kd-tree
610  inline const KDNode *getRoot() const {
611  return m_nodes;
612  }
613 
614  /// Return whether or not the kd-tree has been built
615  inline bool isBuilt() const {
616  return m_nodes != NULL;
617  }
618 
619  /// Return a (slightly enlarged) axis-aligned bounding box containing all primitives
620  inline const AABBType &getAABB() const { return m_aabb; }
621 
622  /// Return a tight axis-aligned bounding box containing all primitives
623  inline const AABBType &getTightAABB() const { return m_tightAABB;}
624 
626 protected:
627  virtual ~KDTreeBase() { }
628 protected:
629  KDNode *m_nodes;
631  AABBType m_aabb, m_tightAABB;
632 };
633 
634 #if defined(_MSC_VER) && !defined(__INTELLISENSE__)
635 /* Use strict IEEE 754 floating point computations
636  for the following kd-tree building code */
638 #pragma float_control(precise, on)
640 #endif
641 
642 #define KDLog(level, fmt, ...) Thread::getThread()->getLogger()->log(\
643  level, KDTreeBase<AABB>::m_theClass, __FILE__, __LINE__, \
644  fmt, ## __VA_ARGS__)
645 
646 /**
647  * \brief Optimized KD-tree acceleration data structure for
648  * n-dimensional (n<=4) shapes and various queries on them.
649  *
650  * Note that this class mainly concerns itself with data that cover a
651  * region of space. For point data, other implementations will be more
652  * suitable. The most important application in Mitsuba is the fast
653  * construction of high-quality trees for ray tracing. See the class
654  * \ref SAHKDTree for this specialization.
655  *
656  * The code in this class is a fully generic kd-tree implementation, which
657  * can theoretically support any kind of shape. However, subclasses still
658  * need to provide the following signatures for a functional implementation:
659  *
660  * \code
661  * /// Return the total number of primitives
662  * inline SizeType getPrimitiveCount() const;
663  *
664  * /// Return the axis-aligned bounding box of a certain primitive
665  * inline AABB getAABB(IndexType primIdx) const;
666  *
667  * /// Return the AABB of a primitive when clipped to another AABB
668  * inline AABB getClippedAABB(IndexType primIdx, const AABBType &aabb) const;
669  * \endcode
670  *
671  * This class follows the "Curiously recurring template" design pattern
672  * so that the above functions can be inlined (in particular, no virtual
673  * calls will be necessary!).
674  *
675  * When the kd-tree is initially built, this class optimizes a cost
676  * heuristic every time a split plane has to be chosen. For ray tracing,
677  * the heuristic is usually the surface area heuristic (SAH), but other
678  * choices are possible as well. The tree construction heuristic must be
679  * passed as a template argument, which can use a supplied AABB and
680  * split candidate to compute approximate probabilities of recursing into
681  * the left and right subrees during a typical kd-tree query operation.
682  * See \ref SurfaceAreaHeuristic for an example of the interface that
683  * must be implemented.
684  *
685  * The kd-tree construction algorithm creates 'perfect split' trees as
686  * outlined in the paper "On Building fast kd-Trees for Ray Tracing, and on
687  * doing that in O(N log N)" by Ingo Wald and Vlastimil Havran. This works
688  * even when the tree is not meant to be used for ray tracing.
689  * For polygonal meshes, the involved Sutherland-Hodgman iterations can be
690  * quite expensive in terms of the overall construction time. The
691  * \ref setClip method can be used to deactivate perfect splits at the
692  * cost of a lower-quality tree.
693  *
694  * Because the O(N log N) construction algorithm tends to cause many
695  * incoherent memory accesses, a fast approximate technique (Min-max
696  * binning) is used near the top of the tree, which significantly reduces
697  * cache misses. Once the input data has been narrowed down to a
698  * reasonable amount, the implementation switches over to the O(N log N)
699  * builder. When multiple processors are available, the build process runs
700  * in parallel.
701  *
702  * \author Wenzel Jakob
703  * \ingroup librender
704  */
705 template <typename AABBType, typename TreeConstructionHeuristic, typename Derived>
706  class GenericKDTree : public KDTreeBase<AABBType> {
707 protected:
708  // Some forward declarations
709  struct MinMaxBins;
710  struct EdgeEvent;
711  struct EdgeEventOrdering;
712 
713 public:
715  typedef typename Parent::SizeType SizeType;
716  typedef typename Parent::IndexType IndexType;
717  typedef typename Parent::KDNode KDNode;
718  typedef typename AABBType::Scalar Scalar;
719  typedef typename AABBType::PointType PointType;
720  typedef typename AABBType::VectorType VectorType;
721 
722  using Parent::m_nodes;
723  using Parent::m_aabb;
724  using Parent::m_tightAABB;
725  using Parent::m_logLevel;
726  using Parent::isBuilt;
727 
728  /**
729  * \brief Create a new kd-tree instance initialized with
730  * the default parameters.
731  */
732  GenericKDTree() : m_indices(NULL) {
733  m_nodes = NULL;
734  m_traversalCost = 15;
735  m_queryCost = 20;
736  m_emptySpaceBonus = 0.9f;
737  m_clip = true;
738  m_stopPrims = 6;
739  m_maxBadRefines = 3;
740  m_exactPrimThreshold = 65536;
741  m_maxDepth = 0;
742  m_retract = true;
743  m_parallelBuild = true;
744  m_minMaxBins = 128;
745  m_logLevel = EDebug;
746  }
747 
748  /**
749  * \brief Release all memory
750  */
751  virtual ~GenericKDTree() {
752  if (m_indices)
753  delete[] m_indices;
754  if (m_nodes)
755  freeAligned(m_nodes-1); // undo alignment shift
756  }
757 
758  /**
759  * \brief Set the traversal cost used by the tree construction heuristic
760  */
761  inline void setTraversalCost(Float traversalCost) {
762  m_traversalCost = traversalCost;
763  }
764 
765  /**
766  * \brief Returns the underlying kd-tree index buffer
767  */
768  inline IndexType *getIndices() const { return m_indices; }
769 
770  /**
771  * \brief Return the traversal cost used by the tree construction heuristic
772  */
773  inline Float getTraversalCost() const {
774  return m_traversalCost;
775  }
776 
777  /**
778  * \brief Set the query cost used by the tree construction heuristic
779  * (This is the average cost for testing a contained shape against
780  * a kd-tree search query)
781  */
782  inline void setQueryCost(Float queryCost) {
783  m_queryCost = queryCost;
784  }
785 
786  /**
787  * \brief Return the query cost used by the tree construction heuristic
788  * (This is the average cost for testing a contained shape against
789  * a kd-tree search query)
790  */
791  inline Float getQueryCost() const {
792  return m_queryCost;
793  }
794 
795  /**
796  * \brief Set the bonus factor for empty space used by the
797  * tree construction heuristic
798  */
799  inline void setEmptySpaceBonus(Float emptySpaceBonus) {
800  m_emptySpaceBonus = emptySpaceBonus;
801  }
802 
803  /**
804  * \brief Return the bonus factor for empty space used by the
805  * tree construction heuristic
806  */
807  inline Float getEmptySpaceBonus() const {
808  return m_emptySpaceBonus;
809  }
810 
811  /**
812  * \brief Set the maximum tree depth (0 = use heuristic)
813  */
814  inline void setMaxDepth(SizeType maxDepth) {
815  m_maxDepth = maxDepth;
816  }
817 
818  /**
819  * \brief Set the number of bins used for Min-Max binning
820  */
821  inline void setMinMaxBins(SizeType minMaxBins) {
822  m_minMaxBins = minMaxBins;
823  }
824 
825  /**
826  * \brief Return the number of bins used for Min-Max binning
827  */
828  inline SizeType getMinMaxBins() const {
829  return m_minMaxBins;
830  }
831 
832  /**
833  * \brief Return maximum tree depth (0 = use heuristic)
834  */
835  inline SizeType getMaxDepth() const {
836  return m_maxDepth;
837  }
838 
839  /**
840  * \brief Specify whether or not to use primitive clipping will
841  * be used in the tree construction.
842  */
843  inline void setClip(bool clip) {
844  m_clip = clip;
845  }
846 
847  /**
848  * \brief Return whether or not to use primitive clipping will
849  * be used in the tree construction.
850  */
851  inline bool getClip() const {
852  return m_clip;
853  }
854 
855  /**
856  * \brief Specify whether or not bad splits can be "retracted".
857  */
858  inline void setRetract(bool retract) {
859  m_retract = retract;
860  }
861 
862  /**
863  * \brief Return whether or not bad splits can be "retracted".
864  */
865  inline bool getRetract() const {
866  return m_retract;
867  }
868 
869  /**
870  * \brief Set the number of bad refines allowed to happen
871  * in succession before a leaf node will be created.
872  */
873  inline void setMaxBadRefines(SizeType maxBadRefines) {
874  m_maxBadRefines = maxBadRefines;
875  }
876 
877  /**
878  * \brief Return the number of bad refines allowed to happen
879  * in succession before a leaf node will be created.
880  */
881  inline SizeType getMaxBadRefines() const {
882  return m_maxBadRefines;
883  }
884 
885  /**
886  * \brief Set the number of primitives, at which recursion will
887  * stop when building the tree.
888  */
889  inline void setStopPrims(SizeType stopPrims) {
890  m_stopPrims = stopPrims;
891  }
892 
893  /**
894  * \brief Return the number of primitives, at which recursion will
895  * stop when building the tree.
896  */
897  inline SizeType getStopPrims() const {
898  return m_stopPrims;
899  }
900 
901  /**
902  * \brief Specify whether or not tree construction
903  * should run in parallel.
904  */
905  inline void setParallelBuild(bool parallel) {
906  m_parallelBuild = parallel;
907  }
908 
909  /**
910  * \brief Return whether or not tree construction
911  * will run in parallel.
912  */
913  inline bool getParallelBuild() const {
914  return m_parallelBuild;
915  }
916 
917  /**
918  * \brief Specify the number of primitives, at which the builder will
919  * switch from (approximate) Min-Max binning to the accurate
920  * O(n log n) optimization method.
921  */
922  inline void setExactPrimitiveThreshold(SizeType exactPrimThreshold) {
923  m_exactPrimThreshold = exactPrimThreshold;
924  }
925 
926  /**
927  * \brief Return the number of primitives, at which the builder will
928  * switch from (approximate) Min-Max binning to the accurate
929  * O(n log n) optimization method.
930  */
932  return m_exactPrimThreshold;
933  }
934 protected:
935  /**
936  * \brief Once the tree has been constructed, it is rewritten into
937  * a more convenient binary storage format.
938  *
939  * This data structure is used to store temporary information about
940  * this process.
941  */
942  struct RewriteItem {
943  const KDNode *node;
946  AABBType aabb;
947 
948  RewriteItem(const KDNode *node, KDNode *target,
949  const typename GenericKDTree::BuildContext *context, const AABBType &aabb) :
950  node(node), target(target), context(context), aabb(aabb) { }
951  };
952 
953  /**
954  * \brief Build a KD-tree over the supplied geometry
955  *
956  * To be called by the subclass.
957  */
958  void buildInternal() {
959  /* Some samity checks */
960  if (isBuilt())
961  KDLog(EError, "The kd-tree has already been built!");
962  if (m_traversalCost <= 0)
963  KDLog(EError, "The traveral cost must be > 0");
964  if (m_queryCost <= 0)
965  KDLog(EError, "The query cost must be > 0");
966  if (m_emptySpaceBonus <= 0 || m_emptySpaceBonus > 1)
967  KDLog(EError, "The empty space bonus must be in [0, 1]");
968  if (m_minMaxBins <= 1)
969  KDLog(EError, "The number of min-max bins must be > 2");
970 
971  SizeType primCount = cast()->getPrimitiveCount();
972  if (primCount == 0) {
973  KDLog(EWarn, "kd-tree contains no geometry!");
974  // +1 shift is for alignment purposes (see KDNode::getSibling)
975  m_nodes = static_cast<KDNode *>(allocAligned(sizeof(KDNode) * 2))+1;
976  m_nodes[0].initLeafNode(0, 0);
977  return;
978  }
979 
980  if (primCount <= m_exactPrimThreshold)
981  m_parallelBuild = false;
982 
983  BuildContext ctx(primCount, m_minMaxBins);
984 
985  /* Establish an ad-hoc depth cutoff value (Formula from PBRT) */
986  if (m_maxDepth == 0)
987  m_maxDepth = (int) (8 + 1.3f * math::log2i(primCount));
988  m_maxDepth = std::min(m_maxDepth, (SizeType) MTS_KD_MAXDEPTH);
989 
990  KDLog(m_logLevel, "Creating a preliminary index list (%s)",
991  memString(primCount * sizeof(IndexType)).c_str());
992 
993  OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
994  IndexType *indices = leftAlloc.allocate<IndexType>(primCount);
995 
996  ref<Timer> timer = new Timer();
997  AABBType &aabb = m_aabb;
998  aabb.reset();
999  for (IndexType i=0; i<primCount; ++i) {
1000  aabb.expandBy(cast()->getAABB(i));
1001  indices[i] = i;
1002  }
1003 
1004  #if defined(DOUBLE_PRECISION)
1005  for (int i=0; i<3; ++i) {
1006  aabb.min[i] = math::castflt_down(aabb.min[i]);
1007  aabb.max[i] = math::castflt_up(aabb.max[i]);
1008  }
1009  #endif
1010 
1011  KDLog(m_logLevel, "Computed scene bounds in %i ms",
1012  timer->getMilliseconds());
1013  KDLog(m_logLevel, "");
1014 
1015  KDLog(m_logLevel, "kd-tree configuration:");
1016  KDLog(m_logLevel, " Traversal cost : %.2f", m_traversalCost);
1017  KDLog(m_logLevel, " Query cost : %.2f", m_queryCost);
1018  KDLog(m_logLevel, " Empty space bonus : %.2f", m_emptySpaceBonus);
1019  KDLog(m_logLevel, " Max. tree depth : %i", m_maxDepth);
1020  KDLog(m_logLevel, " Scene bounding box (min) : %s",
1021  aabb.min.toString().c_str());
1022  KDLog(m_logLevel, " Scene bounding box (max) : %s",
1023  aabb.max.toString().c_str());
1024  KDLog(m_logLevel, " Min-max bins : %i", m_minMaxBins);
1025  KDLog(m_logLevel, " O(n log n) method : use for <= %i primitives",
1026  m_exactPrimThreshold);
1027  KDLog(m_logLevel, " Perfect splits : %s", m_clip ? "yes" : "no");
1028  KDLog(m_logLevel, " Retract bad splits : %s",
1029  m_retract ? "yes" : "no");
1030  KDLog(m_logLevel, " Stopping primitive count : %i", m_stopPrims);
1031  KDLog(m_logLevel, " Build tree in parallel : %s",
1032  m_parallelBuild ? "yes" : "no");
1033  KDLog(m_logLevel, "");
1034 
1035  SizeType procCount = getCoreCount();
1036  if (procCount == 1)
1037  m_parallelBuild = false;
1038 
1039  if (m_parallelBuild) {
1040  m_builders.resize(procCount);
1041  for (SizeType i=0; i<procCount; ++i) {
1042  m_builders[i] = new TreeBuilder(i, this);
1043  m_builders[i]->incRef();
1044  m_builders[i]->start();
1045  }
1046  }
1047 
1048  m_indirectionLock = new Mutex();
1049  KDNode *prelimRoot = ctx.nodes.allocate(1);
1050  buildTreeMinMax(ctx, 1, prelimRoot, aabb, aabb,
1051  indices, primCount, true, 0);
1052  ctx.leftAlloc.release(indices);
1053 
1054  KDAssert(ctx.leftAlloc.used() == 0);
1055  KDAssert(ctx.rightAlloc.used() == 0);
1056 
1057  if (m_parallelBuild) {
1058  UniqueLock lock(m_interface.mutex);
1059  m_interface.done = true;
1060  m_interface.cond->broadcast();
1061  lock.unlock();
1062  for (SizeType i=0; i<m_builders.size(); ++i)
1063  m_builders[i]->join();
1064  }
1065 
1066  KDLog(EInfo, "Finished -- took %i ms.", timer->getMilliseconds());
1067  KDLog(m_logLevel, "");
1068 
1069  KDLog(m_logLevel, "Temporary memory statistics:");
1070  KDLog(m_logLevel, " Classification storage : %s",
1071  memString((ctx.classStorage.size() * (1+procCount))).c_str());
1072  KDLog(m_logLevel, " Indirection entries : " SIZE_T_FMT " (%s)",
1073  m_indirections.size(), memString(m_indirections.capacity()
1074  * sizeof(KDNode *)).c_str());
1075 
1076  KDLog(m_logLevel, " Main thread:");
1077  ctx.printStats(m_logLevel);
1078  size_t totalUsage = m_indirections.capacity()
1079  * sizeof(KDNode *) + ctx.size();
1080 
1081  /// Clean up event lists and print statistics
1082  ctx.leftAlloc.cleanup();
1083  ctx.rightAlloc.cleanup();
1084  for (SizeType i=0; i<m_builders.size(); ++i) {
1085  KDLog(m_logLevel, " Worker thread %i:", i+1);
1086  BuildContext &subCtx = m_builders[i]->getContext();
1087  subCtx.printStats(m_logLevel);
1088  totalUsage += subCtx.size();
1089  subCtx.leftAlloc.cleanup();
1090  subCtx.rightAlloc.cleanup();
1091  ctx.accumulateStatisticsFrom(subCtx);
1092  }
1093  KDLog(m_logLevel, " Total: %s", memString(totalUsage).c_str());
1094 
1095  KDLog(m_logLevel, "");
1096  timer->reset();
1097  KDLog(m_logLevel, "Optimizing memory layout ..");
1098 
1099  Float expTraversalSteps = 0;
1100  Float expLeavesVisited = 0;
1101  Float expPrimitivesIntersected = 0;
1102  Float heuristicCost = 0;
1103 
1104  SizeType nodePtr = 0, indexPtr = 0;
1105  SizeType maxPrimsInLeaf = 0;
1106  const SizeType primBucketCount = 16;
1107  SizeType primBuckets[primBucketCount];
1108  memset(primBuckets, 0, sizeof(SizeType)*primBucketCount);
1109  m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount;
1110  m_indexCount = ctx.primIndexCount;
1111 
1112  // +1 shift is for alignment purposes (see KDNode::getSibling)
1113  m_nodes = static_cast<KDNode *> (allocAligned(
1114  sizeof(KDNode) * (m_nodeCount+1)))+1;
1115  m_indices = new IndexType[m_indexCount];
1116 
1117  /* The following code rewrites all tree nodes with proper relative
1118  indices. It also computes the final tree cost and some other
1119  useful heuristics */
1120  std::stack<RewriteItem> stack;
1121  stack.push(RewriteItem(prelimRoot, &m_nodes[nodePtr++], &ctx, aabb));
1122  while (!stack.empty()) {
1123  RewriteItem item = stack.top();
1124  stack.pop();
1125 
1126  typename std::map<const KDNode *, IndexType>::const_iterator it
1127  = m_interface.threadMap.find(item.node);
1128  // Check if we're switching to a subtree built by a worker thread
1129  if (it != m_interface.threadMap.end())
1130  item.context = &m_builders[(*it).second]->getContext();
1131 
1132  if (item.node->isLeaf()) {
1133  SizeType primStart = item.node->getPrimStart(),
1134  primEnd = item.node->getPrimEnd(),
1135  primsInLeaf = primEnd-primStart;
1136  item.target->initLeafNode(indexPtr, primsInLeaf);
1137 
1138  Float quantity = TreeConstructionHeuristic::getQuantity(item.aabb),
1139  weightedQuantity = quantity * primsInLeaf;
1140  expLeavesVisited += quantity;
1141  expPrimitivesIntersected += weightedQuantity;
1142  heuristicCost += weightedQuantity * m_queryCost;
1143  if (primsInLeaf < primBucketCount)
1144  primBuckets[primsInLeaf]++;
1145  if (primsInLeaf > maxPrimsInLeaf)
1146  maxPrimsInLeaf = primsInLeaf;
1147 
1149  = item.context->indices;
1150  for (SizeType idx = primStart; idx<primEnd; ++idx) {
1151  KDAssert(indices[idx] >= 0 && indices[idx] < primCount);
1152  m_indices[indexPtr++] = indices[idx];
1153  }
1154  } else {
1155  Float quantity = TreeConstructionHeuristic::getQuantity(item.aabb);
1156  expTraversalSteps += quantity;
1157  heuristicCost += quantity * m_traversalCost;
1158 
1159  const KDNode *left;
1160  if (EXPECT_TAKEN(!item.node->isIndirection()))
1161  left = item.node->getLeft();
1162  else
1163  left = m_indirections[item.node->getIndirectionIndex()];
1164 
1165  KDNode *children = &m_nodes[nodePtr];
1166  nodePtr += 2;
1167  int axis = item.node->getAxis();
1168  float split = item.node->getSplit();
1169  bool result = item.target->initInnerNode(axis, split, children - item.target);
1170  if (!result)
1171  KDLog(EError, "Cannot represent relative pointer -- "
1172  "too many primitives?");
1173 
1174  Float tmp = item.aabb.min[axis];
1175  item.aabb.min[axis] = split;
1176  stack.push(RewriteItem(left+1, children+1, item.context, item.aabb));
1177  item.aabb.min[axis] = tmp;
1178  item.aabb.max[axis] = split;
1179  stack.push(RewriteItem(left, children, item.context, item.aabb));
1180  }
1181  }
1182 
1183  KDAssert(nodePtr == ctx.innerNodeCount + ctx.leafNodeCount);
1184  KDAssert(indexPtr == m_indexCount);
1185 
1186  KDLog(m_logLevel, "Finished -- took %i ms.", timer->getMilliseconds());
1187 
1188  /* Free some more memory */
1189  ctx.nodes.clear();
1190  ctx.indices.clear();
1191  for (SizeType i=0; i<m_builders.size(); ++i) {
1192  BuildContext &subCtx = m_builders[i]->getContext();
1193  subCtx.nodes.clear();
1194  subCtx.indices.clear();
1195  }
1196  m_indirectionLock = NULL;
1197  std::vector<KDNode *>().swap(m_indirections);
1198 
1199  if (m_builders.size() > 0) {
1200  for (SizeType i=0; i<m_builders.size(); ++i)
1201  m_builders[i]->decRef();
1202  m_builders.clear();
1203  }
1204 
1205  KDLog(m_logLevel, "");
1206 
1207  Float rootQuantity = TreeConstructionHeuristic::getQuantity(aabb);
1208  expTraversalSteps /= rootQuantity;
1209  expLeavesVisited /= rootQuantity;
1210  expPrimitivesIntersected /= rootQuantity;
1211  heuristicCost /= rootQuantity;
1212 
1213  /* Slightly enlarge the bounding box
1214  (necessary e.g. when the scene is planar) */
1215  m_tightAABB = aabb;
1216 
1217  const Float eps = MTS_KD_AABB_EPSILON;
1218 
1219  aabb.min -= (aabb.max-aabb.min) * eps + VectorType(eps);
1220  aabb.max += (aabb.max-aabb.min) * eps + VectorType(eps);
1221 
1222  KDLog(m_logLevel, "Structural kd-tree statistics:");
1223  KDLog(m_logLevel, " Parallel work units : " SIZE_T_FMT,
1224  m_interface.threadMap.size());
1225  KDLog(m_logLevel, " Node storage cost : %s",
1226  memString(nodePtr * sizeof(KDNode)).c_str());
1227  KDLog(m_logLevel, " Index storage cost : %s",
1228  memString(indexPtr * sizeof(IndexType)).c_str());
1229  KDLog(m_logLevel, " Inner nodes : %i", ctx.innerNodeCount);
1230  KDLog(m_logLevel, " Leaf nodes : %i", ctx.leafNodeCount);
1231  KDLog(m_logLevel, " Nonempty leaf nodes : %i",
1232  ctx.nonemptyLeafNodeCount);
1233  std::ostringstream oss;
1234  oss << " Leaf node histogram : ";
1235  for (SizeType i=0; i<primBucketCount; i++) {
1236  oss << i << "(" << primBuckets[i] << ") ";
1237  if ((i+1)%4==0 && i+1<primBucketCount) {
1238  KDLog(m_logLevel, "%s", oss.str().c_str());
1239  oss.str("");
1240  oss << " ";
1241  }
1242  }
1243  KDLog(m_logLevel, "%s", oss.str().c_str());
1244  KDLog(m_logLevel, "");
1245  KDLog(m_logLevel, "Qualitative kd-tree statistics:");
1246  KDLog(m_logLevel, " Retracted splits : %i", ctx.retractedSplits);
1247  KDLog(m_logLevel, " Pruned primitives : %i", ctx.pruned);
1248  KDLog(m_logLevel, " Largest leaf node : %i primitives",
1249  maxPrimsInLeaf);
1250  KDLog(m_logLevel, " Avg. prims/nonempty leaf : %.2f",
1251  ctx.primIndexCount / (Float) ctx.nonemptyLeafNodeCount);
1252  KDLog(m_logLevel, " Expected traversals/query : %.2f", expTraversalSteps);
1253  KDLog(m_logLevel, " Expected leaf visits/query : %.2f", expLeavesVisited);
1254  KDLog(m_logLevel, " Expected prim. visits/query : %.2f",
1255  expPrimitivesIntersected);
1256  KDLog(m_logLevel, " Final cost : %.2f", heuristicCost);
1257  KDLog(m_logLevel, "");
1258 
1259  #if defined(__LINUX__)
1260  /* Forcefully release Heap memory back to the OS */
1261  malloc_trim(0);
1262  #endif
1263  }
1264 
1265 protected:
1266  /// Primitive classification during tree-construction
1268  /// Straddling primitive
1269  EBothSides = 0,
1270  /// Primitive is entirely on the left side of the split
1271  ELeftSide = 1,
1272  /// Primitive is entirely on the right side of the split
1273  ERightSide = 2,
1274  /// Edge events have been generated for the straddling primitive
1275  EBothSidesProcessed = 3
1276  };
1277 
1278  /**
1279  * \brief Describes the beginning or end of a primitive
1280  * when projected onto a certain dimension.
1281  */
1282  struct EdgeEvent {
1283  /// Possible event types
1284  enum EEventType {
1285  EEdgeEnd = 0,
1286  EEdgePlanar = 1,
1287  EEdgeStart = 2
1288  };
1289 
1290  /// Dummy constructor
1291  inline EdgeEvent() { }
1292 
1293  /// Create a new edge event
1294  inline EdgeEvent(uint16_t type, int axis, float pos, IndexType index)
1295  : pos(pos), index(index), type(type), axis(axis) { }
1296 
1297  /// Return a string representation
1298  std::string toString() const {
1299  std::ostringstream oss;
1300  oss << "EdgeEvent[" << endl
1301  << " pos = " << pos << "," << endl
1302  << " index = " << index << "," << endl
1303  << " type = ";
1304  if (type == EEdgeEnd)
1305  oss << "end";
1306  else if (type == EEdgePlanar)
1307  oss << "planar";
1308  else if (type == EEdgeStart)
1309  oss << "start";
1310  else
1311  oss << "unknown!";
1312  oss << "," << endl
1313  << " axis = " << axis << endl
1314  <<"]";
1315  return oss.str();
1316  }
1317 
1318  /// Plane position
1319  float pos;
1320  /// Primitive index
1322  /// Event type: end/planar/start
1323  unsigned int type:2;
1324  /// Event axis
1325  unsigned int axis:2;
1326  };
1327 
1328  BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
1329 
1330  /// Edge event comparison functor
1331  struct EdgeEventOrdering : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
1332  inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const {
1333  if (a.axis != b.axis)
1334  return a.axis < b.axis;
1335  if (a.pos != b.pos)
1336  return a.pos < b.pos;
1337  if (a.type != b.type)
1338  return a.type < b.type;
1339  return a.index < b.index;
1340  }
1341  };
1342 
1343  /**
1344  * \brief Data type for split candidates computed by
1345  * the O(n log n) greedy optimization method.
1346  * */
1349  float pos;
1350  int axis;
1351  SizeType numLeft, numRight;
1353  int leftBin;
1354 
1355  inline SplitCandidate() :
1356  cost(std::numeric_limits<Float>::infinity()),
1357  pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false), leftBin(-1) {
1358  }
1359 
1360  std::string toString() const {
1361  std::ostringstream oss;
1362  oss << "SplitCandidate[" << endl
1363  << " cost=" << cost << "," << endl
1364  << " pos=" << pos << "," << endl
1365  << " axis=" << axis << "," << endl
1366  << " numLeft=" << numLeft << "," << endl
1367  << " numRight=" << numRight << "," << endl
1368  << " leftBin=" << leftBin << "," << endl
1369  << " planarLeft=" << (planarLeft ? "yes" : "no") << endl
1370  << "]";
1371  return oss.str();
1372  }
1373  };
1374 
1375  /**
1376  * \brief Per-thread context used to manage memory allocations,
1377  * also records some useful statistics.
1378  */
1379  struct BuildContext {
1385 
1392 
1393  BuildContext(SizeType primCount, SizeType binCount)
1394  : minMaxBins(binCount) {
1395  classStorage.setPrimitiveCount(primCount);
1396  leafNodeCount = 0;
1397  nonemptyLeafNodeCount = 0;
1398  innerNodeCount = 0;
1399  primIndexCount = 0;
1400  retractedSplits = 0;
1401  pruned = 0;
1402  }
1403 
1404  size_t size() {
1405  return leftAlloc.size() + rightAlloc.size()
1406  + nodes.capacity() * sizeof(KDNode)
1407  + indices.capacity() * sizeof(IndexType)
1408  + classStorage.size();
1409  }
1410 
1411  void printStats(ELogLevel level) {
1412  KDLog(level, " Left events : " SIZE_T_FMT " chunks (%s)",
1413  leftAlloc.getChunkCount(),
1414  memString(leftAlloc.size()).c_str());
1415  KDLog(level, " Right events : " SIZE_T_FMT " chunks (%s)",
1416  rightAlloc.getChunkCount(),
1417  memString(rightAlloc.size()).c_str());
1418  KDLog(level, " kd-tree nodes : " SIZE_T_FMT " entries, "
1419  SIZE_T_FMT " blocks (%s)", nodes.size(), nodes.blockCount(),
1420  memString(nodes.capacity() * sizeof(KDNode)).c_str());
1421  KDLog(level, " Indices : " SIZE_T_FMT " entries, "
1422  SIZE_T_FMT " blocks (%s)", indices.size(),
1423  indices.blockCount(), memString(indices.capacity()
1424  * sizeof(IndexType)).c_str());
1425  }
1426 
1428  leafNodeCount += ctx.leafNodeCount;
1429  nonemptyLeafNodeCount += ctx.nonemptyLeafNodeCount;
1430  innerNodeCount += ctx.innerNodeCount;
1431  primIndexCount += ctx.primIndexCount;
1432  retractedSplits += ctx.retractedSplits;
1433  pruned += ctx.pruned;
1434  }
1435  };
1436 
1437  /**
1438  * \brief Communication data structure used to pass jobs to
1439  * kd-tree builder threads
1440  */
1442  /* Communcation */
1445  std::map<const KDNode *, IndexType> threadMap;
1446  bool done;
1447 
1448  /* Job description for building a subtree */
1449  int depth;
1451  AABBType nodeAABB;
1452  EdgeEvent *eventStart, *eventEnd;
1455 
1456  inline BuildInterface() {
1457  mutex = new Mutex();
1458  cond = new ConditionVariable(mutex);
1459  condJobTaken = new ConditionVariable(mutex);
1460  node = NULL;
1461  done = false;
1462  }
1463  };
1464 
1465  /**
1466  * \brief kd-tree builder thread
1467  */
1468  class TreeBuilder : public Thread {
1469  public:
1471  : Thread(formatString("bld%i", id)),
1472  m_id(id),
1473  m_parent(parent),
1474  m_context(parent->cast()->getPrimitiveCount(),
1475  parent->getMinMaxBins()),
1476  m_interface(parent->m_interface) {
1477  setCritical(true);
1478  }
1479 
1481  KDAssert(m_context.leftAlloc.used() == 0);
1482  KDAssert(m_context.rightAlloc.used() == 0);
1483  }
1484 
1485  void run() {
1486  OrderedChunkAllocator &leftAlloc = m_context.leftAlloc;
1487  while (true) {
1488  UniqueLock lock(m_interface.mutex);
1489  while (!m_interface.done && !m_interface.node)
1490  m_interface.cond->wait();
1491  if (m_interface.done) {
1492  break;
1493  }
1494  int depth = m_interface.depth;
1495  KDNode *node = m_interface.node;
1496  AABBType nodeAABB = m_interface.nodeAABB;
1497  size_t eventCount = m_interface.eventEnd - m_interface.eventStart;
1498  SizeType primCount = m_interface.primCount;
1499  int badRefines = m_interface.badRefines;
1500  EdgeEvent *eventStart = leftAlloc.allocate<EdgeEvent>(eventCount),
1501  *eventEnd = eventStart + eventCount;
1502  memcpy(eventStart, m_interface.eventStart,
1503  eventCount * sizeof(EdgeEvent));
1504  m_interface.threadMap[node] = m_id;
1505  m_interface.node = NULL;
1506  m_interface.condJobTaken->signal();
1507  lock.unlock();
1508 
1509  std::sort(eventStart, eventEnd, EdgeEventOrdering());
1510  m_parent->buildTree(m_context, depth, node,
1511  nodeAABB, eventStart, eventEnd, primCount, true, badRefines);
1512  leftAlloc.release(eventStart);
1513  }
1514  }
1515 
1517  return m_context;
1518  }
1519 
1520  private:
1521  IndexType m_id;
1522  GenericKDTree *m_parent;
1523  BuildContext m_context;
1524  BuildInterface &m_interface;
1525  };
1526 
1527  /// Cast to the derived class
1528  inline Derived *cast() {
1529  return static_cast<Derived *>(this);
1530  }
1531 
1532  /// Cast to the derived class (const version)
1533  inline const Derived *cast() const {
1534  return static_cast<const Derived *>(this);
1535  }
1536 
1537  struct EventList {
1540 
1541  EventList(EdgeEvent *start, EdgeEvent *end, SizeType primCount)
1542  : start(start), end(end), primCount(primCount) { }
1543  };
1544 
1545  /**
1546  * \brief Create an edge event list for a given list of primitives.
1547  *
1548  * This is necessary when passing from Min-Max binning to the more
1549  * accurate O(n log n) optimizier.
1550  */
1551  EventList createEventList(
1552  OrderedChunkAllocator &alloc, const AABBType &nodeAABB,
1553  IndexType *prims, SizeType primCount) {
1554  SizeType initialSize = primCount * 2 * PointType::dim, actualPrimCount = 0;
1555  EdgeEvent *eventStart = alloc.allocate<EdgeEvent>(initialSize);
1556  EdgeEvent *eventEnd = eventStart;
1557 
1558  for (SizeType i=0; i<primCount; ++i) {
1559  IndexType index = prims[i];
1560  AABBType aabb;
1561  if (m_clip) {
1562  aabb = cast()->getClippedAABB(index, nodeAABB);
1563  if (!aabb.isValid() || aabb.getSurfaceArea() == 0)
1564  continue;
1565  } else {
1566  aabb = cast()->getAABB(index);
1567  }
1568 
1569  for (int axis=0; axis<PointType::dim; ++axis) {
1570  float min = math::castflt_down(aabb.min[axis]),
1571  max = math::castflt_up(aabb.max[axis]);
1572 
1573  if (min == max) {
1574  *eventEnd++ = EdgeEvent(EdgeEvent::EEdgePlanar, axis,
1575  min, index);
1576  } else {
1577  *eventEnd++ = EdgeEvent(EdgeEvent::EEdgeStart, axis,
1578  min, index);
1579  *eventEnd++ = EdgeEvent(EdgeEvent::EEdgeEnd, axis,
1580  max, index);
1581  }
1582  }
1583  ++actualPrimCount;
1584  }
1585 
1586  SizeType newSize = (SizeType) (eventEnd - eventStart);
1587  if (newSize != initialSize)
1588  alloc.shrinkAllocation<EdgeEvent>(eventStart, newSize);
1589 
1590  return EventList(eventStart, eventEnd, actualPrimCount);
1591  }
1592 
1593  /**
1594  * \brief Leaf node creation helper function
1595  *
1596  * \param ctx
1597  * Thread-specific build context containing allocators etc.
1598  * \param node
1599  * KD-tree node entry to be filled
1600  * \param eventStart
1601  * Start pointer of an edge event list
1602  * \param eventEnd
1603  * End pointer of an edge event list
1604  * \param primCount
1605  * Total primitive count for the current node
1606  */
1607  void createLeaf(BuildContext &ctx, KDNode *node, EdgeEvent *eventStart,
1608  EdgeEvent *eventEnd, SizeType primCount) {
1609  node->initLeafNode((SizeType) ctx.indices.size(), primCount);
1610  if (primCount > 0) {
1611  SizeType seenPrims = 0;
1612  ctx.nonemptyLeafNodeCount++;
1613 
1614  for (EdgeEvent *event = eventStart; event != eventEnd
1615  && event->axis == 0; ++event) {
1616  if (event->type == EdgeEvent::EEdgeStart ||
1617  event->type == EdgeEvent::EEdgePlanar) {
1618  ctx.indices.push_back(event->index);
1619  seenPrims++;
1620  }
1621  }
1622  KDAssert(seenPrims == primCount);
1623  ctx.primIndexCount += primCount;
1624  }
1625  ctx.leafNodeCount++;
1626  }
1627 
1628  /**
1629  * \brief Leaf node creation helper function
1630  *
1631  * \param ctx
1632  * Thread-specific build context containing allocators etc.
1633  * \param node
1634  * KD-tree node entry to be filled
1635  * \param indices
1636  * Start pointer of an index list
1637  * \param primCount
1638  * Total primitive count for the current node
1639  */
1640  void createLeaf(BuildContext &ctx, KDNode *node, SizeType *indices,
1641  SizeType primCount) {
1642  node->initLeafNode((SizeType) ctx.indices.size(), primCount);
1643  if (primCount > 0) {
1644  ctx.nonemptyLeafNodeCount++;
1645  for (SizeType i=0; i<primCount; ++i)
1646  ctx.indices.push_back(indices[i]);
1647  ctx.primIndexCount += primCount;
1648  }
1649  ctx.leafNodeCount++;
1650  }
1651 
1652  /**
1653  * \brief Leaf node creation helper function.
1654  *
1655  * Creates a unique index list by collapsing
1656  * a subtree with a bad cost.
1657  *
1658  * \param ctx
1659  * Thread-specific build context containing allocators etc.
1660  * \param node
1661  * KD-tree node entry to be filled
1662  * \param start
1663  * Start pointer of the subtree indices
1664  */
1665  void createLeafAfterRetraction(BuildContext &ctx, KDNode *node, SizeType start) {
1666  SizeType indexCount = (SizeType) (ctx.indices.size() - start);
1667  SAssert(indexCount > 0);
1668 
1669  OrderedChunkAllocator &alloc = ctx.leftAlloc;
1670 
1671  /* A temporary list is allocated to do the sorting (the indices
1672  are not guaranteed to be contiguous in memory) */
1673  IndexType *tempStart = alloc.allocate<IndexType>(indexCount),
1674  *tempEnd = tempStart + indexCount,
1675  *ptr = tempStart;
1676 
1677  for (SizeType i=start, end = start + indexCount; i<end; ++i)
1678  *ptr++ = ctx.indices[i];
1679 
1680  /* Generate an index list without duplicate entries */
1681  std::sort(tempStart, tempEnd, std::less<IndexType>());
1682  ptr = tempStart;
1683 
1684  int idx = start;
1685  while (ptr < tempEnd) {
1686  ctx.indices[idx] = *ptr++;
1687  while (ptr < tempEnd && *ptr == ctx.indices[idx])
1688  ++ptr;
1689  idx++;
1690  }
1691 
1692  int nSeen = idx-start;
1693  ctx.primIndexCount = ctx.primIndexCount - indexCount + nSeen;
1694  ctx.indices.resize(idx);
1695  alloc.release(tempStart);
1696  node->initLeafNode(start, nSeen);
1697  ctx.nonemptyLeafNodeCount++;
1698  ctx.leafNodeCount++;
1699  }
1700 
1701  /**
1702  * \brief Implements the transition from min-max-binning to the
1703  * O(n log n) optimization.
1704  *
1705  * \param ctx
1706  * Thread-specific build context containing allocators etc.
1707  * \param depth
1708  * Current tree depth (1 == root node)
1709  * \param node
1710  * KD-tree node entry to be filled
1711  * \param nodeAABB
1712  * Axis-aligned bounding box of the current node
1713  * \param indices
1714  * Index list of all triangles in the current node (for min-max binning)
1715  * \param primCount
1716  * Total primitive count for the current node
1717  * \param isLeftChild
1718  * Is this node the left child of its parent? This is important for
1719  * memory management using the \ref OrderedChunkAllocator.
1720  * \param badRefines
1721  * Number of "probable bad refines" further up the tree. This makes
1722  * it possible to split along an initially bad-looking candidate in
1723  * the hope that the cost was significantly overestimated. The
1724  * counter makes sure that only a limited number of such splits can
1725  * happen in succession.
1726  * \returns
1727  * Final cost of the node
1728  */
1729  inline Float transitionToNLogN(BuildContext &ctx, unsigned int depth, KDNode *node,
1730  const AABBType &nodeAABB, IndexType *indices,
1731  SizeType primCount, bool isLeftChild, SizeType badRefines) {
1732  OrderedChunkAllocator &alloc = isLeftChild
1733  ? ctx.leftAlloc : ctx.rightAlloc;
1734  EventList events = createEventList(alloc, nodeAABB, indices, primCount);
1735  Float cost;
1736  if (m_parallelBuild) {
1737  LockGuard lock(m_interface.mutex);
1738  m_interface.depth = depth;
1739  m_interface.node = node;
1740  m_interface.nodeAABB = nodeAABB;
1741  m_interface.eventStart = events.start;
1742  m_interface.eventEnd = events.end;
1743  m_interface.primCount = events.primCount;
1744  m_interface.badRefines = badRefines;
1745  m_interface.cond->signal();
1746 
1747  /* Wait for a worker thread to take this job */
1748  while (m_interface.node)
1749  m_interface.condJobTaken->wait();
1750 
1751  // Never tear down this subtree (return a cost of -infinity)
1752  cost = -std::numeric_limits<Float>::infinity();
1753  } else {
1754  std::sort(events.start, events.end, EdgeEventOrdering());
1755 
1756  cost = buildTree(ctx, depth, node, nodeAABB, events.start,
1757  events.end, events.primCount, isLeftChild, badRefines);
1758  }
1759  alloc.release(events.start);
1760  return cost;
1761  }
1762 
1763  /**
1764  * \brief Build helper function (min-max binning)
1765  *
1766  * \param ctx
1767  * Thread-specific build context containing allocators etc.
1768  * \param depth
1769  * Current tree depth (1 == root node)
1770  * \param node
1771  * KD-tree node entry to be filled
1772  * \param nodeAABB
1773  * Axis-aligned bounding box of the current node
1774  * \param tightAABB
1775  * Tight bounding box of the contained geometry (for min-max binning)
1776  * \param indices
1777  * Index list of all triangles in the current node (for min-max binning)
1778  * \param primCount
1779  * Total primitive count for the current node
1780  * \param isLeftChild
1781  * Is this node the left child of its parent? This is important for
1782  * memory management using the \ref OrderedChunkAllocator.
1783  * \param badRefines
1784  * Number of "probable bad refines" further up the tree. This makes
1785  * it possible to split along an initially bad-looking candidate in
1786  * the hope that the cost was significantly overestimated. The
1787  * counter makes sure that only a limited number of such splits can
1788  * happen in succession.
1789  * \returns
1790  * Final cost of the node
1791  */
1792  Float buildTreeMinMax(BuildContext &ctx, unsigned int depth, KDNode *node,
1793  const AABBType &nodeAABB, const AABBType &tightAABB, IndexType *indices,
1794  SizeType primCount, bool isLeftChild, SizeType badRefines) {
1795  KDAssert(nodeAABB.contains(tightAABB));
1796 
1797  Float leafCost = primCount * m_queryCost;
1798  if (primCount <= m_stopPrims || depth >= m_maxDepth) {
1799  createLeaf(ctx, node, indices, primCount);
1800  return leafCost;
1801  }
1802 
1803  if (primCount <= m_exactPrimThreshold)
1804  return transitionToNLogN(ctx, depth, node, nodeAABB, indices,
1805  primCount, isLeftChild, badRefines);
1806 
1807  /* ==================================================================== */
1808  /* Binning */
1809  /* ==================================================================== */
1810 
1811  ctx.minMaxBins.setAABB(tightAABB);
1812  ctx.minMaxBins.bin(cast(), indices, primCount);
1813 
1814  /* ==================================================================== */
1815  /* Split candidate search */
1816  /* ==================================================================== */
1817  SplitCandidate bestSplit = ctx.minMaxBins.minimizeCost(m_traversalCost,
1818  m_queryCost);
1819 
1820  if (bestSplit.cost == std::numeric_limits<Float>::infinity()) {
1821  /* This is bad: we have either run out of floating point precision to
1822  accurately represent split planes (e.g. 'tightAABB' is almost collapsed
1823  along an axis), or the compiler made overly liberal use of floating point
1824  optimizations, causing the two stages of the min-max binning code to
1825  become inconsistent. The two ways to proceed at this point are to
1826  either create a leaf (bad) or switch over to the O(n log n) greedy
1827  optimization, which is done below */
1828  KDLog(EWarn, "Min-max binning was unable to split %i primitives with %s "
1829  "-- retrying with the O(n log n) greedy optimization",
1830  primCount, tightAABB.toString().c_str());
1831  return transitionToNLogN(ctx, depth, node, nodeAABB, indices,
1832  primCount, isLeftChild, badRefines);
1833  }
1834 
1835  /* "Bad refines" heuristic from PBRT */
1836  if (bestSplit.cost >= leafCost) {
1837  if ((bestSplit.cost > 4 * leafCost && primCount < 16)
1838  || badRefines >= m_maxBadRefines) {
1839  createLeaf(ctx, node, indices, primCount);
1840  return leafCost;
1841  }
1842  ++badRefines;
1843  }
1844 
1845  /* ==================================================================== */
1846  /* Partitioning */
1847  /* ==================================================================== */
1848 
1849  typename MinMaxBins::Partition partition =
1850  ctx.minMaxBins.partition(ctx, cast(), indices, bestSplit,
1851  isLeftChild, m_traversalCost, m_queryCost);
1852 
1853  /* ==================================================================== */
1854  /* Recursion */
1855  /* ==================================================================== */
1856 
1857  KDNode *children = ctx.nodes.allocate(2);
1858 
1859  SizeType nodePosBeforeSplit = (SizeType) ctx.nodes.size();
1860  SizeType indexPosBeforeSplit = (SizeType) ctx.indices.size();
1861  SizeType leafNodeCountBeforeSplit = ctx.leafNodeCount;
1862  SizeType nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount;
1863  SizeType innerNodeCountBeforeSplit = ctx.innerNodeCount;
1864 
1865  if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) {
1866  LockGuard lock(m_indirectionLock);
1867  SizeType indirectionIdx = (SizeType) m_indirections.size();
1868  m_indirections.push_back(children);
1869  /* Unable to store relative offset -- create an indirection
1870  table entry */
1871  node->initIndirectionNode(bestSplit.axis, bestSplit.pos,
1872  indirectionIdx);
1873  }
1874  ctx.innerNodeCount++;
1875 
1876  AABBType childAABB(nodeAABB);
1877  childAABB.max[bestSplit.axis] = bestSplit.pos;
1878 
1879  Float leftCost = buildTreeMinMax(ctx, depth+1, children,
1880  childAABB, partition.left, partition.leftIndices,
1881  bestSplit.numLeft, true, badRefines);
1882 
1883  childAABB.min[bestSplit.axis] = bestSplit.pos;
1884  childAABB.max[bestSplit.axis] = nodeAABB.max[bestSplit.axis];
1885 
1886  Float rightCost = buildTreeMinMax(ctx, depth+1, children + 1,
1887  childAABB, partition.right, partition.rightIndices,
1888  bestSplit.numRight, false, badRefines);
1889 
1890  TreeConstructionHeuristic tch(nodeAABB);
1891  std::pair<Float, Float> prob = tch(bestSplit.axis,
1892  bestSplit.pos - nodeAABB.min[bestSplit.axis],
1893  nodeAABB.max[bestSplit.axis] - bestSplit.pos);
1894 
1895  /* Compute the final cost given the updated cost
1896  values received from the children */
1897  Float finalCost = m_traversalCost +
1898  (prob.first * leftCost + prob.second * rightCost);
1899 
1900  /* Release the index lists not needed by the children anymore */
1901  if (isLeftChild)
1902  ctx.rightAlloc.release(partition.rightIndices);
1903  else
1904  ctx.leftAlloc.release(partition.leftIndices);
1905 
1906  /* ==================================================================== */
1907  /* Final decision */
1908  /* ==================================================================== */
1909 
1910  if (!m_retract || finalCost < primCount * m_queryCost) {
1911  return finalCost;
1912  } else {
1913  /* In the end, splitting didn't help to reduce the cost.
1914  Tear up everything below this node and create a leaf */
1915  ctx.nodes.resize(nodePosBeforeSplit);
1916  ctx.retractedSplits++;
1917  ctx.leafNodeCount = leafNodeCountBeforeSplit;
1918  ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit;
1919  ctx.innerNodeCount = innerNodeCountBeforeSplit;
1920  createLeafAfterRetraction(ctx, node, indexPosBeforeSplit);
1921  return leafCost;
1922  }
1923  }
1924 
1925  /*
1926  * \brief Build helper function (greedy O(n log n) optimization)
1927  *
1928  * \param ctx
1929  * Thread-specific build context containing allocators etc.
1930  * \param depth
1931  * Current tree depth (1 == root node)
1932  * \param node
1933  * KD-tree node entry to be filled
1934  * \param nodeAABB
1935  * Axis-aligned bounding box of the current node
1936  * \param eventStart
1937  * Pointer to the beginning of a sorted edge event list
1938  * \param eventEnd
1939  * Pointer to the end of a sorted edge event list
1940  * \param primCount
1941  * Total primitive count for the current node
1942  * \param isLeftChild
1943  * Is this node the left child of its parent? This is important for
1944  * memory management using the \ref OrderedChunkAllocator.
1945  * \param badRefines
1946  * Number of "probable bad refines" further up the tree. This makes
1947  * it possible to split along an initially bad-looking candidate in
1948  * the hope that the cost was significantly overestimated. The
1949  * counter makes sure that only a limited number of such splits can
1950  * happen in succession.
1951  * \returns
1952  * Final cost of the node
1953  */
1954  Float buildTree(BuildContext &ctx, unsigned int depth, KDNode *node,
1955  const AABBType &nodeAABB, EdgeEvent *eventStart, EdgeEvent *eventEnd,
1956  SizeType primCount, bool isLeftChild, SizeType badRefines) {
1957 
1958  Float leafCost = primCount * m_queryCost;
1959  if (primCount <= m_stopPrims || depth >= m_maxDepth) {
1960  createLeaf(ctx, node, eventStart, eventEnd, primCount);
1961  return leafCost;
1962  }
1963 
1964  SplitCandidate bestSplit;
1965 
1966  /* ==================================================================== */
1967  /* Split candidate search */
1968  /* ==================================================================== */
1969 
1970  /* First, find the optimal splitting plane according to the
1971  tree construction heuristic. To do this in O(n), the search is
1972  implemented as a sweep over the edge events */
1973 
1974  /* Initially, the split plane is placed left of the scene
1975  and thus all geometry is on its right side */
1976  SizeType numLeft[PointType::dim],
1977  numRight[PointType::dim];
1978 
1979  for (int i=0; i<PointType::dim; ++i) {
1980  numLeft[i] = 0;
1981  numRight[i] = primCount;
1982  }
1983 
1984  EdgeEvent *eventsByAxis[PointType::dim];
1985  int eventsByAxisCtr = 1;
1986  eventsByAxis[0] = eventStart;
1987  TreeConstructionHeuristic tch(nodeAABB);
1988 
1989  /* Iterate over all events on the current axis */
1990  for (EdgeEvent *event = eventStart; event < eventEnd; ) {
1991  /* Record the current position and count the number
1992  and type of remaining events, which are also here.
1993  Due to the sort ordering, there is no need to worry
1994  about an axis change in the loops below */
1995  int axis = event->axis;
1996  float pos = event->pos;
1997  SizeType numStart = 0, numEnd = 0, numPlanar = 0;
1998 
1999  /* Count "end" events */
2000  while (event < eventEnd && event->pos == pos
2001  && event->axis == axis
2002  && event->type == EdgeEvent::EEdgeEnd) {
2003  ++numEnd; ++event;
2004  }
2005 
2006  /* Count "planar" events */
2007  while (event < eventEnd && event->pos == pos
2008  && event->axis == axis
2009  && event->type == EdgeEvent::EEdgePlanar) {
2010  ++numPlanar; ++event;
2011  }
2012 
2013  /* Count "start" events */
2014  while (event < eventEnd && event->pos == pos
2015  && event->axis == axis
2016  && event->type == EdgeEvent::EEdgeStart) {
2017  ++numStart; ++event;
2018  }
2019 
2020  /* Keep track of the beginning of dimensions */
2021  if (event < eventEnd && event->axis != axis) {
2022  KDAssert(eventsByAxisCtr < PointType::dim);
2023  eventsByAxis[eventsByAxisCtr++] = event;
2024  }
2025 
2026  /* The split plane can now be moved onto 't'. Accordingly, all planar
2027  and ending primitives are removed from the right side */
2028  numRight[axis] -= numPlanar + numEnd;
2029 
2030  /* Calculate a score using the tree construction heuristic */
2031  if (EXPECT_TAKEN(pos > nodeAABB.min[axis] && pos < nodeAABB.max[axis])) {
2032  const SizeType nL = numLeft[axis], nR = numRight[axis];
2033  const Float nLF = (Float) nL, nRF = (Float) nR;
2034 
2035  std::pair<Float, Float> prob = tch(axis,
2036  pos - nodeAABB.min[axis],
2037  nodeAABB.max[axis] - pos);
2038 
2039  if (numPlanar == 0) {
2040  Float cost = m_traversalCost + m_queryCost
2041  * (prob.first * nLF + prob.second * nRF);
2042  if (nL == 0 || nR == 0)
2043  cost *= m_emptySpaceBonus;
2044  if (cost < bestSplit.cost) {
2045  bestSplit.pos = pos;
2046  bestSplit.axis = axis;
2047  bestSplit.cost = cost;
2048  bestSplit.numLeft = nL;
2049  bestSplit.numRight = nR;
2050  }
2051  } else {
2052  Float costPlanarLeft = m_traversalCost
2053  + m_queryCost * (prob.first * (nL+numPlanar) + prob.second * nRF);
2054  Float costPlanarRight = m_traversalCost
2055  + m_queryCost * (prob.first * nLF + prob.second * (nR+numPlanar));
2056 
2057  if (nL + numPlanar == 0 || nR == 0)
2058  costPlanarLeft *= m_emptySpaceBonus;
2059  if (nL == 0 || nR + numPlanar == 0)
2060  costPlanarRight *= m_emptySpaceBonus;
2061 
2062  if (costPlanarLeft < bestSplit.cost ||
2063  costPlanarRight < bestSplit.cost) {
2064  bestSplit.pos = pos;
2065  bestSplit.axis = axis;
2066 
2067  if (costPlanarLeft < costPlanarRight) {
2068  bestSplit.cost = costPlanarLeft;
2069  bestSplit.numLeft = nL + numPlanar;
2070  bestSplit.numRight = nR;
2071  bestSplit.planarLeft = true;
2072  } else {
2073  bestSplit.cost = costPlanarRight;
2074  bestSplit.numLeft = nL;
2075  bestSplit.numRight = nR + numPlanar;
2076  bestSplit.planarLeft = false;
2077  }
2078  }
2079  }
2080  } else {
2081  #if defined(MTS_KD_DEBUG)
2082  if (m_clip && (pos < nodeAABB.min[axis]
2083  || pos > nodeAABB.max[axis])) {
2084  /* When primitive clipping is active, this should never happen! */
2085  KDLog(EError, "Internal error: edge event is out of bounds");
2086  }
2087  #endif
2088  }
2089 
2090  /* The split plane is moved past 't'. All prims,
2091  which were planar on 't', are moved to the left
2092  side. Also, starting prims are now also left of
2093  the split plane. */
2094  numLeft[axis] += numStart + numPlanar;
2095  }
2096 
2097 #if defined(MTS_KD_DEBUG)
2098  /* Sanity checks. Everything should now be left of the split plane */
2099  for (int i=0; i<PointType::dim; ++i)
2100  KDAssert(numRight[i] == 0 && numLeft[i] == primCount);
2101  for (int i=1; i<PointType::dim; ++i)
2102  KDAssert(eventsByAxis[i]->axis == i && (eventsByAxis[i]-1)->axis == i-1);
2103 #endif
2104 
2105  /* "Bad refines" heuristic from PBRT */
2106  if (bestSplit.cost >= leafCost) {
2107  if ((bestSplit.cost > 4 * leafCost && primCount < 16)
2108  || badRefines >= m_maxBadRefines
2109  || bestSplit.cost == std::numeric_limits<Float>::infinity()) {
2110  createLeaf(ctx, node, eventStart, eventEnd, primCount);
2111  return leafCost;
2112  }
2113  ++badRefines;
2114  }
2115 
2116  /* ==================================================================== */
2117  /* Primitive Classification */
2118  /* ==================================================================== */
2119 
2120  ClassificationStorage &storage = ctx.classStorage;
2121 
2122  /* Initially mark all prims as being located on both sides */
2123  for (EdgeEvent *event = eventsByAxis[bestSplit.axis];
2124  event < eventEnd && event->axis == bestSplit.axis; ++event)
2125  storage.set(event->index, EBothSides);
2126 
2127  SizeType primsLeft = 0, primsRight = 0, primsBoth = primCount;
2128  /* Sweep over all edge events and classify the primitives wrt. the split */
2129  for (EdgeEvent *event = eventsByAxis[bestSplit.axis];
2130  event < eventEnd && event->axis == bestSplit.axis; ++event) {
2131  if (event->type == EdgeEvent::EEdgeEnd && event->pos <= bestSplit.pos) {
2132  /* The primitive's interval ends before or on the split plane
2133  -> classify to the left side */
2134  KDAssert(storage.get(event->index) == EBothSides);
2135  storage.set(event->index, ELeftSide);
2136  primsBoth--;
2137  primsLeft++;
2138  } else if (event->type == EdgeEvent::EEdgeStart
2139  && event->pos >= bestSplit.pos) {
2140  /* The primitive's interval starts after or on the split plane
2141  -> classify to the right side */
2142  KDAssert(storage.get(event->index) == EBothSides);
2143  storage.set(event->index, ERightSide);
2144  primsBoth--;
2145  primsRight++;
2146  } else if (event->type == EdgeEvent::EEdgePlanar) {
2147  /* If the planar primitive is not on the split plane, the
2148  classification is easy. Otherwise, place it on the side with
2149  the lower cost */
2150  KDAssert(storage.get(event->index) == EBothSides);
2151  if (event->pos < bestSplit.pos || (event->pos == bestSplit.pos
2152  && bestSplit.planarLeft)) {
2153  storage.set(event->index, ELeftSide);
2154  primsBoth--;
2155  primsLeft++;
2156  } else if (event->pos > bestSplit.pos
2157  || (event->pos == bestSplit.pos && !bestSplit.planarLeft)) {
2158  storage.set(event->index, ERightSide);
2159  primsBoth--;
2160  primsRight++;
2161  } else {
2162  KDAssertEx(false, "Internal error!");
2163  }
2164  }
2165  }
2166 
2167  /* Some sanity checks */
2168  KDAssert(primsLeft + primsRight + primsBoth == primCount);
2169  KDAssert(primsLeft + primsBoth == bestSplit.numLeft);
2170  KDAssert(primsRight + primsBoth == bestSplit.numRight);
2171 
2172  OrderedChunkAllocator &leftAlloc = ctx.leftAlloc,
2173  &rightAlloc = ctx.rightAlloc;
2174 
2175  EdgeEvent *leftEventsStart, *rightEventsStart;
2176  if (isLeftChild) {
2177  leftEventsStart = eventStart;
2178  rightEventsStart = rightAlloc.allocate<EdgeEvent>(bestSplit.numRight * 2 * PointType::dim);
2179  } else {
2180  leftEventsStart = leftAlloc.allocate<EdgeEvent>(bestSplit.numLeft * 2 * PointType::dim);
2181  rightEventsStart = eventStart;
2182  }
2183 
2184  EdgeEvent *leftEventsEnd = leftEventsStart, *rightEventsEnd = rightEventsStart;
2185 
2186  AABBType leftNodeAABB = nodeAABB, rightNodeAABB = nodeAABB;
2187  leftNodeAABB.max[bestSplit.axis] = bestSplit.pos;
2188  rightNodeAABB.min[bestSplit.axis] = bestSplit.pos;
2189 
2190  SizeType prunedLeft = 0, prunedRight = 0;
2191 
2192  /* ==================================================================== */
2193  /* Partitioning */
2194  /* ==================================================================== */
2195 
2196  if (m_clip) {
2197  EdgeEvent
2198  *leftEventsTempStart = leftAlloc.allocate<EdgeEvent>(primsLeft * 2 * PointType::dim),
2199  *rightEventsTempStart = rightAlloc.allocate<EdgeEvent>(primsRight * 2 * PointType::dim),
2200  *newEventsLeftStart = leftAlloc.allocate<EdgeEvent>(primsBoth * 2 * PointType::dim),
2201  *newEventsRightStart = rightAlloc.allocate<EdgeEvent>(primsBoth * 2 * PointType::dim);
2202 
2203  EdgeEvent *leftEventsTempEnd = leftEventsTempStart,
2204  *rightEventsTempEnd = rightEventsTempStart,
2205  *newEventsLeftEnd = newEventsLeftStart,
2206  *newEventsRightEnd = newEventsRightStart;
2207 
2208  for (EdgeEvent *event = eventStart; event<eventEnd; ++event) {
2209  int classification = storage.get(event->index);
2210 
2211  if (classification == ELeftSide) {
2212  /* Left-only primitive. Move to the left list and advance */
2213  *leftEventsTempEnd++ = *event;
2214  } else if (classification == ERightSide) {
2215  /* Right-only primitive. Move to the right list and advance */
2216  *rightEventsTempEnd++ = *event;
2217  } else if (classification == EBothSides) {
2218  /* The primitive overlaps the split plane. Re-clip and
2219  generate new events for each side */
2220  const IndexType index = event->index;
2221 
2222  AABBType clippedLeft = cast()->getClippedAABB(index, leftNodeAABB);
2223  AABBType clippedRight = cast()->getClippedAABB(index, rightNodeAABB);
2224 
2225  KDAssert(leftNodeAABB.contains(clippedLeft));
2226  KDAssert(rightNodeAABB.contains(clippedRight));
2227 
2228  if (clippedLeft.isValid() && clippedLeft.getSurfaceArea() > 0) {
2229  for (int axis=0; axis<PointType::dim; ++axis) {
2230  float min = clippedLeft.min[axis],
2231  max = clippedLeft.max[axis];
2232 
2233  if (min == max) {
2234  *newEventsLeftEnd++ = EdgeEvent(
2235  EdgeEvent::EEdgePlanar,
2236  axis, min, index);
2237  } else {
2238  *newEventsLeftEnd++ = EdgeEvent(
2239  EdgeEvent::EEdgeStart,
2240  axis, min, index);
2241  *newEventsLeftEnd++ = EdgeEvent(
2242  EdgeEvent::EEdgeEnd,
2243  axis, max, index);
2244  }
2245  }
2246  } else {
2247  prunedLeft++;
2248  }
2249 
2250  if (clippedRight.isValid() && clippedRight.getSurfaceArea() > 0) {
2251  for (int axis=0; axis<PointType::dim; ++axis) {
2252  float min = clippedRight.min[axis],
2253  max = clippedRight.max[axis];
2254 
2255  if (min == max) {
2256  *newEventsRightEnd++ = EdgeEvent(
2257  EdgeEvent::EEdgePlanar,
2258  axis, min, index);
2259  } else {
2260  *newEventsRightEnd++ = EdgeEvent(
2261  EdgeEvent::EEdgeStart,
2262  axis, min, index);
2263  *newEventsRightEnd++ = EdgeEvent(
2264  EdgeEvent::EEdgeEnd,
2265  axis, max, index);
2266  }
2267  }
2268  } else {
2269  prunedRight++;
2270  }
2271 
2272  /* Mark this primitive as processed so that clipping
2273  is only done once */
2274  storage.set(index, EBothSidesProcessed);
2275  }
2276  }
2277 
2278  KDAssert((SizeType) (leftEventsTempEnd - leftEventsTempStart) <= primsLeft * 2 * PointType::dim);
2279  KDAssert((SizeType) (rightEventsTempEnd - rightEventsTempStart) <= primsRight * 2 * PointType::dim);
2280  KDAssert((SizeType) (newEventsLeftEnd - newEventsLeftStart) <= primsBoth * 2 * PointType::dim);
2281  KDAssert((SizeType) (newEventsRightEnd - newEventsRightStart) <= primsBoth * 2 * PointType::dim);
2282  ctx.pruned += prunedLeft + prunedRight;
2283 
2284  /* Sort the events from overlapping prims */
2285  std::sort(newEventsLeftStart, newEventsLeftEnd, EdgeEventOrdering());
2286  std::sort(newEventsRightStart, newEventsRightEnd, EdgeEventOrdering());
2287 
2288  /* Merge the left list */
2289  leftEventsEnd = std::merge(leftEventsTempStart,
2290  leftEventsTempEnd, newEventsLeftStart, newEventsLeftEnd,
2291  leftEventsStart, EdgeEventOrdering());
2292 
2293  /* Merge the right list */
2294  rightEventsEnd = std::merge(rightEventsTempStart,
2295  rightEventsTempEnd, newEventsRightStart, newEventsRightEnd,
2296  rightEventsStart, EdgeEventOrdering());
2297 
2298  /* Release temporary memory */
2299  leftAlloc.release(newEventsLeftStart);
2300  leftAlloc.release(leftEventsTempStart);
2301  rightAlloc.release(newEventsRightStart);
2302  rightAlloc.release(rightEventsTempStart);
2303  } else {
2304  for (EdgeEvent *event = eventStart; event < eventEnd; ++event) {
2305  int classification = storage.get(event->index);
2306 
2307  if (classification == ELeftSide) {
2308  /* Left-only primitive. Move to the left list and advance */
2309  *leftEventsEnd++ = *event;
2310  } else if (classification == ERightSide) {
2311  /* Right-only primitive. Move to the right list and advance */
2312  *rightEventsEnd++ = *event;
2313  } else if (classification == EBothSides) {
2314  /* The primitive overlaps the split plane. Its edge events
2315  must be added to both lists. */
2316  *leftEventsEnd++ = *event;
2317  *rightEventsEnd++ = *event;
2318  }
2319  }
2320  KDAssert((SizeType) (leftEventsEnd - leftEventsStart) <= bestSplit.numLeft * 2 * PointType::dim);
2321  KDAssert((SizeType) (rightEventsEnd - rightEventsStart) <= bestSplit.numRight * 2 * PointType::dim);
2322  }
2323 
2324  /* Shrink the edge event storage now that we know exactly how
2325  many are on each side */
2326  ctx.leftAlloc.shrinkAllocation(leftEventsStart,
2327  leftEventsEnd - leftEventsStart);
2328 
2329  ctx.rightAlloc.shrinkAllocation(rightEventsStart,
2330  rightEventsEnd - rightEventsStart);
2331 
2332  /* ==================================================================== */
2333  /* Recursion */
2334  /* ==================================================================== */
2335 
2336  KDNode *children = ctx.nodes.allocate(2);
2337 
2338  SizeType nodePosBeforeSplit = (SizeType) ctx.nodes.size();
2339  SizeType indexPosBeforeSplit = (SizeType) ctx.indices.size();
2340  SizeType leafNodeCountBeforeSplit = ctx.leafNodeCount;
2341  SizeType nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount;
2342  SizeType innerNodeCountBeforeSplit = ctx.innerNodeCount;
2343 
2344  if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) {
2345  LockGuard lock(m_indirectionLock);
2346  SizeType indirectionIdx = (SizeType) m_indirections.size();
2347  m_indirections.push_back(children);
2348  /* Unable to store relative offset -- create an indirection
2349  table entry */
2350  node->initIndirectionNode(bestSplit.axis, bestSplit.pos,
2351  indirectionIdx);
2352  }
2353  ctx.innerNodeCount++;
2354 
2355  Float leftCost = buildTree(ctx, depth+1, children,
2356  leftNodeAABB, leftEventsStart, leftEventsEnd,
2357  bestSplit.numLeft - prunedLeft, true, badRefines);
2358 
2359  Float rightCost = buildTree(ctx, depth+1, children+1,
2360  rightNodeAABB, rightEventsStart, rightEventsEnd,
2361  bestSplit.numRight - prunedRight, false, badRefines);
2362 
2363  std::pair<Float, Float> prob = tch(bestSplit.axis,
2364  bestSplit.pos - nodeAABB.min[bestSplit.axis],
2365  nodeAABB.max[bestSplit.axis] - bestSplit.pos);
2366 
2367  /* Compute the final cost given the updated cost
2368  values received from the children */
2369  Float finalCost = m_traversalCost +
2370  (prob.first * leftCost + prob.second * rightCost);
2371 
2372  /* Release the index lists not needed by the children anymore */
2373  if (isLeftChild)
2374  ctx.rightAlloc.release(rightEventsStart);
2375  else
2376  ctx.leftAlloc.release(leftEventsStart);
2377 
2378  /* ==================================================================== */
2379  /* Final decision */
2380  /* ==================================================================== */
2381 
2382  if (!m_retract || finalCost < primCount * m_queryCost) {
2383  return finalCost;
2384  } else {
2385  /* In the end, splitting didn't help to reduce the SAH cost.
2386  Tear up everything below this node and create a leaf */
2387  ctx.nodes.resize(nodePosBeforeSplit);
2388  ctx.retractedSplits++;
2389  ctx.leafNodeCount = leafNodeCountBeforeSplit;
2390  ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit;
2391  ctx.innerNodeCount = innerNodeCountBeforeSplit;
2392  createLeafAfterRetraction(ctx, node, indexPosBeforeSplit);
2393  return leafCost;
2394  }
2395 
2396  return bestSplit.cost;
2397  }
2398 
2399  /**
2400  * \brief Min-max binning as described in
2401  * "Highly Parallel Fast KD-tree Construction for Interactive
2402  * Ray Tracing of Dynamic Scenes"
2403  * by M. Shevtsov, A. Soupikov and A. Kapustin
2404  */
2405  struct MinMaxBins {
2406  MinMaxBins(SizeType nBins) : m_binCount(nBins) {
2407  m_minBins = new SizeType[m_binCount*PointType::dim];
2408  m_maxBins = new SizeType[m_binCount*PointType::dim];
2409  }
2410 
2412  delete[] m_minBins;
2413  delete[] m_maxBins;
2414  }
2415 
2416  /**
2417  * \brief Prepare to bin for the specified bounds
2418  */
2419  void setAABB(const AABBType &aabb) {
2420  for (int axis=0; axis<PointType::dim; ++axis) {
2421  float min = math::castflt_down(aabb.min[axis]);
2422  float max = math::castflt_up(aabb.max[axis]);
2423  m_min[axis] = min;
2424  m_aabb.min[axis] = min;
2425  m_aabb.max[axis] = max;
2426  m_binSize[axis] = (max-min) / m_binCount;
2427  m_invBinSize[axis] = 1 / m_binSize[axis];
2428  }
2429  }
2430 
2431  /// Compute the bin location for a given position and axis
2432  inline IndexType computeIndex(float pos, int axis) {
2433  return (IndexType) std::min((float) (m_binCount-1), std::max(0.0f, (pos - m_min[axis]) * m_invBinSize[axis]));
2434  }
2435 
2436  /**
2437  * \brief Run min-max binning
2438  *
2439  * \param derived Derived class to be used to determine the AABB for
2440  * a given list of primitives
2441  * \param indices Primitive indirection list
2442  * \param primCount Specifies the length of \a indices
2443  */
2444  void bin(const Derived *derived, IndexType *indices,
2445  SizeType primCount) {
2446  m_primCount = primCount;
2447  memset(m_minBins, 0, sizeof(SizeType) * PointType::dim * m_binCount);
2448  memset(m_maxBins, 0, sizeof(SizeType) * PointType::dim * m_binCount);
2449 
2450  for (SizeType i=0; i<m_primCount; ++i) {
2451  const AABBType aabb = derived->getAABB(indices[i]);
2452  for (int axis=0; axis<PointType::dim; ++axis) {
2453  m_minBins[axis * m_binCount + computeIndex(math::castflt_down(aabb.min[axis]), axis)]++;
2454  m_maxBins[axis * m_binCount + computeIndex(math::castflt_up (aabb.max[axis]), axis)]++;
2455  }
2456  }
2457  }
2458 
2459  /**
2460  * \brief Evaluate the tree construction heuristic at each bin boundary
2461  * and return the minimizer for the given cost constants. Min-max
2462  * binning uses no "empty space bonus" since it cannot create such
2463  * splits.
2464  */
2465  SplitCandidate minimizeCost(Float traversalCost, Float queryCost) {
2466  TreeConstructionHeuristic tch(m_aabb);
2467  SplitCandidate candidate;
2468  int binIdx = 0;
2469 
2470  for (int axis=0; axis<PointType::dim; ++axis) {
2471  SizeType numLeft = 0, numRight = m_primCount;
2472  Float leftWidth = 0, rightWidth = m_aabb.max[axis] - m_aabb.min[axis];
2473  const float binSize = m_binSize[axis];
2474 
2475  for (int i=0; i<m_binCount-1; ++i) {
2476  numLeft += m_minBins[binIdx];
2477  numRight -= m_maxBins[binIdx];
2478  leftWidth += binSize;
2479  rightWidth -= binSize;
2480  std::pair<Float, Float> prob =
2481  tch(axis, leftWidth, rightWidth);
2482 
2483  Float cost = traversalCost + queryCost
2484  * (prob.first * numLeft + prob.second * numRight);
2485 
2486  if (cost < candidate.cost) {
2487  candidate.cost = cost;
2488  candidate.axis = axis;
2489  candidate.numLeft = numLeft;
2490  candidate.numRight = numRight;
2491  candidate.leftBin = i;
2492  }
2493 
2494  binIdx++;
2495  }
2496  binIdx++;
2497  }
2498 
2499  KDAssert(candidate.cost != std::numeric_limits<Float>::infinity() &&
2500  candidate.leftBin >= 0);
2501 
2502  return candidate;
2503  }
2504 
2505  struct Partition {
2506  AABBType left;
2508  AABBType right;
2510 
2511  Partition(const AABBType &left, IndexType *leftIndices,
2512  const AABBType &right, IndexType *rightIndices) :
2513  left(left), leftIndices(leftIndices), right(right),
2514  rightIndices(rightIndices) { }
2515  };
2516 
2517  /**
2518  * \brief Given a suitable split candiate, compute tight bounding
2519  * boxes for the left and right subtrees and return associated
2520  * primitive lists.
2521  */
2523  BuildContext &ctx, const Derived *derived, IndexType *primIndices,
2524  SplitCandidate &split, bool isLeftChild, Float traversalCost,
2525  Float queryCost) {
2526  SizeType numLeft = 0, numRight = 0;
2527  AABBType leftBounds, rightBounds;
2528  const int axis = split.axis;
2529 
2530  IndexType *leftIndices, *rightIndices;
2531  if (isLeftChild) {
2532  OrderedChunkAllocator &rightAlloc = ctx.rightAlloc;
2533  leftIndices = primIndices;
2534  rightIndices = rightAlloc.allocate<IndexType>(split.numRight);
2535  } else {
2536  OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
2537  leftIndices = leftAlloc.allocate<IndexType>(split.numLeft);
2538  rightIndices = primIndices;
2539  }
2540 
2541  for (SizeType i=0; i<m_primCount; ++i) {
2542  const IndexType primIndex = primIndices[i];
2543  const AABBType aabb = derived->getAABB(primIndex);
2544  int startIdx = computeIndex(math::castflt_down(aabb.min[axis]), axis);
2545  int endIdx = computeIndex(math::castflt_up (aabb.max[axis]), axis);
2546 
2547  if (endIdx <= split.leftBin) {
2548  KDAssert(numLeft < split.numLeft);
2549  leftBounds.expandBy(aabb);
2550  leftIndices[numLeft++] = primIndex;
2551  } else if (startIdx > split.leftBin) {
2552  KDAssert(numRight < split.numRight);
2553  rightBounds.expandBy(aabb);
2554  rightIndices[numRight++] = primIndex;
2555  } else {
2556  leftBounds.expandBy(aabb);
2557  rightBounds.expandBy(aabb);
2558  KDAssert(numLeft < split.numLeft);
2559  KDAssert(numRight < split.numRight);
2560  leftIndices[numLeft++] = primIndex;
2561  rightIndices[numRight++] = primIndex;
2562  }
2563  }
2564  leftBounds.clip(m_aabb);
2565  rightBounds.clip(m_aabb);
2566  split.pos = m_min[axis] + m_binSize[axis] * (split.leftBin + 1);
2567  leftBounds.max[axis] = std::min(leftBounds.max[axis], (Float) split.pos);
2568  rightBounds.min[axis] = std::max(rightBounds.min[axis], (Float) split.pos);
2569 
2570  KDAssert(numLeft == split.numLeft);
2571  KDAssert(numRight == split.numRight);
2572 
2573  /// Release the unused memory regions
2574  if (isLeftChild)
2575  ctx.leftAlloc.shrinkAllocation(leftIndices, split.numLeft);
2576  else
2577  ctx.rightAlloc.shrinkAllocation(rightIndices, split.numRight);
2578 
2579  return Partition(leftBounds, leftIndices,
2580  rightBounds, rightIndices);
2581  }
2582  private:
2583  SizeType *m_minBins;
2584  SizeType *m_maxBins;
2585  SizeType m_primCount;
2586  int m_binCount;
2587  float m_min[PointType::dim];
2588  float m_binSize[PointType::dim];
2589  float m_invBinSize[PointType::dim];
2590  AABBType m_aabb;
2591  };
2592 
2593 protected:
2598  bool m_clip, m_retract, m_parallelBuild;
2606  std::vector<TreeBuilder *> m_builders;
2607  std::vector<KDNode *> m_indirections;
2609  BuildInterface m_interface;
2610 };
2611 
2612 #if defined(_MSC_VER)
2613 /* Revert back to fast / non-strict IEEE 754
2614  floating point computations */
2616 #pragma float_control(precise, off)
2618 #endif
2619 
2620 template <typename AABBType>
2622  = new Class("KDTreeBase", true, "Object");
2623 
2624 template <typename AABBType>
2626  return m_theClass;
2627 }
2628 
2629 
2630 // Explicit instantiation to avoid linking error outside the library
2631 #ifdef _MSC_VER
2632 template class MTS_EXPORT_RENDER KDTreeBase<AABB>;
2633 #endif
2634 
2636 
2637 #endif /* __MITSUBA_RENDER_GKDTREE_H_ */
IndexType * m_indices
Definition: gkdtree.h:2594
#define KDAssertEx(expr, text)
Definition: gkdtree.h:57
IndexType computeIndex(float pos, int axis)
Compute the bin location for a given position and axis.
Definition: gkdtree.h:2432
EMask
Definition: gkdtree.h:482
~OrderedChunkAllocator()
Definition: gkdtree.h:86
SizeType getMaxDepth() const
Return maximum tree depth (0 = use heuristic)
Definition: gkdtree.h:835
OrderedChunkAllocator leftAlloc
Definition: gkdtree.h:1380
T & operator[](size_t index)
Definition: gkdtree.h:322
#define KDLog(level, fmt,...)
Definition: gkdtree.h:642
Data type for split candidates computed by the O(n log n) greedy optimization method.
Definition: gkdtree.h:1347
MTS_EXPORT_CORE std::string formatString(const char *pFmt,...)
Wrapped snprintf.
size_t getChunkCount() const
Definition: gkdtree.h:201
SizeType getMinMaxBins() const
Return the number of bins used for Min-Max binning.
Definition: gkdtree.h:828
Parent::SizeType SizeType
Definition: gkdtree.h:715
Communication data structure used to pass jobs to kd-tree builder threads.
Definition: gkdtree.h:1441
ref< ConditionVariable > condJobTaken
Definition: gkdtree.h:1444
void resize(size_t pos)
Resize the vector to the given size.
Definition: gkdtree.h:361
size_t used() const
Return the total amount of used memory in bytes.
Definition: gkdtree.h:217
std::string toString() const
Definition: gkdtree.h:1360
void createLeaf(BuildContext &ctx, KDNode *node, EdgeEvent *eventStart, EdgeEvent *eventEnd, SizeType primCount)
Leaf node creation helper function.
Definition: gkdtree.h:1607
EventList(EdgeEvent *start, EdgeEvent *end, SizeType primCount)
Definition: gkdtree.h:1541
Describes the beginning or end of a primitive when projected onto a certain dimension.
Definition: gkdtree.h:1282
FINLINE const KDNode *__restrict getLeft() const
Return the left child (assuming that this is an interior node)
Definition: gkdtree.h:551
Float getQueryCost() const
Return the query cost used by the tree construction heuristic (This is the average cost for testing a...
Definition: gkdtree.h:791
const T & operator[](size_t index) const
Definition: gkdtree.h:327
std::string toString() const
Return a string representation of the chunks.
Definition: gkdtree.h:228
float castflt_up(float val)
Cast to single precision and round up if not exactly representable (passthrough)
Definition: math.h:281
void setMaxBadRefines(SizeType maxBadRefines)
Set the number of bad refines allowed to happen in succession before a leaf node will be created...
Definition: gkdtree.h:873
void initLeafNode(unsigned int offset, unsigned int numPrims)
Initialize a leaf kd-Tree node.
Definition: gkdtree.h:492
KDNode * m_nodes
Definition: gkdtree.h:629
bool planarLeft
Definition: gkdtree.h:1352
BuildContext & getContext()
Definition: gkdtree.h:1516
Partition(const AABBType &left, IndexType *leftIndices, const AABBType &right, IndexType *rightIndices)
Definition: gkdtree.h:2511
int get(uint32_t index) const
Definition: gkdtree.h:421
SizeType m_stopPrims
Definition: gkdtree.h:2600
ClassificationStorage classStorage
Definition: gkdtree.h:1383
uint32_t combined
Definition: gkdtree.h:463
ELogLevel m_logLevel
Definition: gkdtree.h:630
FINLINE IndexType getPrimStart() const
Assuming this is a leaf node, return the first primitive index.
Definition: gkdtree.h:536
BlockedVector()
Definition: gkdtree.h:273
SizeType primIndexCount
Definition: gkdtree.h:1389
EClassificationResult
Primitive classification during tree-construction.
Definition: gkdtree.h:1267
Condition variable synchronization primitive. Can be used to wait for a condition to become true in a...
Definition: lock.h:106
float pos
Definition: gkdtree.h:1349
bool isBuilt() const
Return whether or not the kd-tree has been built.
Definition: gkdtree.h:615
BuildContext(SizeType primCount, SizeType binCount)
Definition: gkdtree.h:1393
void setAABB(const AABBType &aabb)
Prepare to bin for the specified bounds.
Definition: gkdtree.h:2419
Compact storage for primitive classifcation.
Definition: gkdtree.h:394
SizeType leafNodeCount
Definition: gkdtree.h:1386
void createLeafAfterRetraction(BuildContext &ctx, KDNode *node, SizeType start)
Leaf node creation helper function.
Definition: gkdtree.h:1665
Float m_queryCost
Definition: gkdtree.h:2596
KDNode * node
Definition: gkdtree.h:1450
void setQueryCost(Float queryCost)
Set the query cost used by the tree construction heuristic (This is the average cost for testing a co...
Definition: gkdtree.h:782
OrderedChunkAllocator rightAlloc
Definition: gkdtree.h:1380
void push_back(const T &value)
Append an element to the end.
Definition: gkdtree.h:282
void setMinMaxBins(SizeType minMaxBins)
Set the number of bins used for Min-Max binning.
Definition: gkdtree.h:821
#define KDAssert(expr)
Definition: gkdtree.h:56
bool getClip() const
Return whether or not to use primitive clipping will be used in the tree construction.
Definition: gkdtree.h:851
unsigned int type
Event type: end/planar/start.
Definition: gkdtree.h:1323
const KDNode * getRoot() const
Return the root node of the kd-tree.
Definition: gkdtree.h:610
Min-max binning as described in &quot;Highly Parallel Fast KD-tree Construction for Interactive Ray Traci...
Definition: gkdtree.h:2405
size_t size() const
Return the total amount of chunk memory in bytes.
Definition: gkdtree.h:206
void createLeaf(BuildContext &ctx, KDNode *node, SizeType *indices, SizeType primCount)
Leaf node creation helper function.
Definition: gkdtree.h:1640
ELogLevel getLogLevel() const
Return the log level of kd-tree status messages.
Definition: gkdtree.h:604
Float m_emptySpaceBonus
Definition: gkdtree.h:2597
#define SLog(level, fmt,...)
Write a Log message to the console (static version - to be used outside of classes that derive from O...
Definition: logger.h:49
Debug message, usually turned off.
Definition: formatter.h:30
std::map< const KDNode *, IndexType > threadMap
Definition: gkdtree.h:1445
Base class of all kd-trees.
Definition: gkdtree.h:441
void initIndirectionNode(int axis, float split, uint32_t indirectionEntry)
Initialize an interior indirection node.
Definition: gkdtree.h:517
EventList createEventList(OrderedChunkAllocator &alloc, const AABBType &nodeAABB, IndexType *prims, SizeType primCount)
Create an edge event list for a given list of primitives.
Definition: gkdtree.h:1551
float pos
Plane position.
Definition: gkdtree.h:1319
SizeType getStopPrims() const
Return the number of primitives, at which recursion will stop when building the tree.
Definition: gkdtree.h:897
#define MTS_KD_AABB_EPSILON
To avoid numerical issues, the size of the scene bounding box is increased by this amount...
Definition: gkdtree.h:50
SizeType m_exactPrimThreshold
Definition: gkdtree.h:2602
size_t blockCount() const
Return the number of allocated blocks.
Definition: gkdtree.h:343
SizeType m_maxBadRefines
Definition: gkdtree.h:2601
void setClip(bool clip)
Specify whether or not to use primitive clipping will be used in the tree construction.
Definition: gkdtree.h:843
SizeType nonemptyLeafNodeCount
Definition: gkdtree.h:1387
AABBType aabb
Definition: gkdtree.h:946
Float getEmptySpaceBonus() const
Return the bonus factor for empty space used by the tree construction heuristic.
Definition: gkdtree.h:807
const GenericKDTree::BuildContext * context
Definition: gkdtree.h:945
SizeType getExactPrimitiveThreshold() const
Return the number of primitives, at which the builder will switch from (approximate) Min-Max binning ...
Definition: gkdtree.h:931
void setTraversalCost(Float traversalCost)
Set the traversal cost used by the tree construction heuristic.
Definition: gkdtree.h:761
#define MTS_NAMESPACE_BEGIN
Definition: platform.h:137
bool operator()(const EdgeEvent &a, const EdgeEvent &b) const
Definition: gkdtree.h:1332
uint32_t SizeType
Size number format.
Definition: gkdtree.h:447
#define MTS_KD_MAXDEPTH
Activate lots of extra checks.
Definition: gkdtree.h:37
void unlock()
Definition: lock.h:211
IndexType * getIndices() const
Returns the underlying kd-tree index buffer.
Definition: gkdtree.h:768
SizeType primCount
Definition: gkdtree.h:1539
MinMaxBins minMaxBins
Definition: gkdtree.h:1384
SizeType retractedSplits
Definition: gkdtree.h:1390
void printStats(ELogLevel level)
Definition: gkdtree.h:1411
void accumulateStatisticsFrom(const BuildContext &ctx)
Definition: gkdtree.h:1427
~BlockedVector()
Definition: gkdtree.h:275
More relevant debug / information message.
Definition: formatter.h:31
std::string toString() const
Return a string representation.
Definition: gkdtree.h:1298
Simple RAII-style locking of a Mutex. On construction it locks the mutex and unlocks it on destructio...
Definition: lock.h:170
SizeType numLeft
Definition: gkdtree.h:1351
SizeType numRight
Definition: gkdtree.h:1351
size_t size() const
Return the currently used number of items.
Definition: gkdtree.h:336
int depth
Definition: gkdtree.h:1449
unsigned int axis
Event axis.
Definition: gkdtree.h:1325
void shrinkAllocation(T *ptr, size_t newSize)
Shrink the size of the last allocated chunk.
Definition: gkdtree.h:175
~ClassificationStorage()
Definition: gkdtree.h:399
AABBType m_tightAABB
Definition: gkdtree.h:631
EdgeEvent * eventStart
Definition: gkdtree.h:1452
bool initInnerNode(int axis, float split, ptrdiff_t relOffset)
Definition: gkdtree.h:501
void setPrimitiveCount(size_t size)
Definition: gkdtree.h:404
KD-tree node in 8 bytes.
Definition: gkdtree.h:452
SplitCandidate()
Definition: gkdtree.h:1355
FINLINE int getAxis() const
Return the split axis (assuming that this is an interior node)
Definition: gkdtree.h:581
void setStopPrims(SizeType stopPrims)
Set the number of primitives, at which recursion will stop when building the tree.
Definition: gkdtree.h:889
std::vector< TreeBuilder * > m_builders
Definition: gkdtree.h:2606
OrderedChunkAllocator(size_t minAllocation=512 *1024)
Definition: gkdtree.h:81
MinMaxBins(SizeType nBins)
Definition: gkdtree.h:2406
SizeType m_nodeCount
Definition: gkdtree.h:2604
void forget()
Forget about all chunks without actually freeing them. This is useful when the chunks have been merge...
Definition: gkdtree.h:114
T *__restrict allocate(size_t size)
Allocate a certain number of elements and return a pointer to the first one.
Definition: gkdtree.h:300
ref< Mutex > mutex
Definition: gkdtree.h:1443
Once the tree has been constructed, it is rewritten into a more convenient binary storage format...
Definition: gkdtree.h:942
BlockedVector< IndexType,(512 *1024/sizeof(uint32_t)) > indices
Definition: gkdtree.h:1382
BlockedVector< KDNode,(512 *1024/sizeof(KDNode)) > nodes
Definition: gkdtree.h:1381
MTS_EXPORT_CORE void *__restrict allocAligned(size_t size)
Allocate an aligned region of memory.
BuildInterface m_interface
Definition: gkdtree.h:2609
void set(uint32_t index, int value)
Definition: gkdtree.h:415
GenericKDTree()
Create a new kd-tree instance initialized with the default parameters.
Definition: gkdtree.h:732
void clear()
Release all memory.
Definition: gkdtree.h:371
TreeBuilder(IndexType id, GenericKDTree *parent)
Definition: gkdtree.h:1470
void merge(const OrderedChunkAllocator &other)
Merge the chunks of another allocator into this one.
Definition: gkdtree.h:103
void setParallelBuild(bool parallel)
Specify whether or not tree construction should run in parallel.
Definition: gkdtree.h:905
IndexType * rightIndices
Definition: gkdtree.h:2509
MTS_EXPORT_CORE int getCoreCount()
Determine the number of available CPU cores.
SizeType m_maxDepth
Definition: gkdtree.h:2599
int badRefines
Definition: gkdtree.h:1454
KDNode * target
Definition: gkdtree.h:944
uint32_t IndexType
Index number format (max 2^32 prims)
Definition: gkdtree.h:444
T *__restrict allocate(size_t size)
Request a block of memory from the allocator.
Definition: gkdtree.h:124
ClassificationStorage()
Definition: gkdtree.h:396
Per-thread context used to manage memory allocations, also records some useful statistics.
Definition: gkdtree.h:1379
kd-tree builder thread
Definition: gkdtree.h:1468
void setLogLevel(ELogLevel level)
Return the log level of kd-tree status messages.
Definition: gkdtree.h:607
size_t size() const
Definition: gkdtree.h:427
const KDNode * node
Definition: gkdtree.h:943
#define SAssert(cond)
``Static&#39;&#39; assertion (to be used outside of classes that derive from Object)
Definition: logger.h:79
float split
Split plane coordinate.
Definition: gkdtree.h:466
FINLINE const KDNode *__restrict getRight() const
Return the left child (assuming that this is an interior node)
Definition: gkdtree.h:568
#define MTS_DECLARE_CLASS()
This macro must be used in the initial definition in classes that derive from Object.
Definition: class.h:158
Optimized KD-tree acceleration data structure for n-dimensional (n&lt;=4) shapes and various queries on ...
Definition: gkdtree.h:706
void setMaxDepth(SizeType maxDepth)
Set the maximum tree depth (0 = use heuristic)
Definition: gkdtree.h:814
AABBType::VectorType VectorType
Definition: gkdtree.h:720
#define SIZE_T_FMT
Definition: platform.h:92
Reference counting helper.
Definition: ref.h:40
Warning message.
Definition: formatter.h:32
AABBType right
Definition: gkdtree.h:2508
Parent::KDNode KDNode
Definition: gkdtree.h:717
AABBType left
Definition: gkdtree.h:2506
FINLINE float getSplit() const
Return the split plane location (assuming that this is an interior node)
Definition: gkdtree.h:576
Stores meta-information about Object instances.
Definition: class.h:43
Basic vector implementation, which stores all data in a list of fixed-sized blocks.
Definition: gkdtree.h:271
RewriteItem(const KDNode *node, KDNode *target, const typename GenericKDTree::BuildContext *context, const AABBType &aabb)
Definition: gkdtree.h:948
BuildInterface()
Definition: gkdtree.h:1456
Cross-platform thread implementation.
Definition: thread.h:34
KDTreeBase< AABBType > Parent
Definition: gkdtree.h:711
MTS_EXPORT_CORE std::string memString(size_t size, bool precise=false)
Turn a memory size into a human-readable string.
SizeType innerNodeCount
Definition: gkdtree.h:1388
Platform independent milli/micro/nanosecond timerThis class implements a simple cross-platform timer ...
Definition: timer.h:37
ELogLevel
Available Log message types.
Definition: formatter.h:28
FINLINE IndexType getIndirectionIndex() const
Return the index of an indirection node.
Definition: gkdtree.h:546
Special &quot;ordered&quot; memory allocator.
Definition: gkdtree.h:79
Parent::IndexType IndexType
Definition: gkdtree.h:716
Error message, causes an exception to be thrown.
Definition: formatter.h:33
Float transitionToNLogN(BuildContext &ctx, unsigned int depth, KDNode *node, const AABBType &nodeAABB, IndexType *indices, SizeType primCount, bool isLeftChild, SizeType badRefines)
Implements the transition from min-max-binning to the O(n log n) optimization.
Definition: gkdtree.h:1729
EdgeEvent(uint16_t type, int axis, float pos, IndexType index)
Create a new edge event.
Definition: gkdtree.h:1294
IndexType index
Primitive index.
Definition: gkdtree.h:1321
Float getTraversalCost() const
Return the traversal cost used by the tree construction heuristic.
Definition: gkdtree.h:773
void bin(const Derived *derived, IndexType *indices, SizeType primCount)
Run min-max binning.
Definition: gkdtree.h:2444
Edge event comparison functor.
Definition: gkdtree.h:1331
void setRetract(bool retract)
Specify whether or not bad splits can be &quot;retracted&quot;.
Definition: gkdtree.h:858
FINLINE bool isIndirection() const
Is this an indirection node?
Definition: gkdtree.h:531
ref< Mutex > m_indirectionLock
Definition: gkdtree.h:2608
virtual ~GenericKDTree()
Release all memory.
Definition: gkdtree.h:751
void release(T *ptr)
Definition: gkdtree.h:149
AABBType::PointType PointType
Definition: gkdtree.h:719
std::vector< KDNode * > m_indirections
Definition: gkdtree.h:2607
EdgeEvent * start
Definition: gkdtree.h:1538
std::string toString() const
Return a string representation.
Definition: gkdtree.h:586
bool getParallelBuild() const
Return whether or not tree construction will run in parallel.
Definition: gkdtree.h:913
FINLINE bool isLeaf() const
Is this a leaf node?
Definition: gkdtree.h:526
void cleanup()
Release all memory used by the allocator.
Definition: gkdtree.h:93
size_t capacity() const
Return the total capacity.
Definition: gkdtree.h:350
SizeType m_minMaxBins
Definition: gkdtree.h:2603
uint32_t end
End offset of the primitive list.
Definition: gkdtree.h:478
MTS_EXPORT_CORE void freeAligned(void *ptr)
Free an aligned region of memory.
Definition: gkdtree.h:1537
const AABBType & getAABB() const
Return a (slightly enlarged) axis-aligned bounding box containing all primitives. ...
Definition: gkdtree.h:620
EEventType
Possible event types.
Definition: gkdtree.h:1284
int leftBin
Definition: gkdtree.h:1353
SizeType getMaxBadRefines() const
Return the number of bad refines allowed to happen in succession before a leaf node will be created...
Definition: gkdtree.h:881
const AABBType & getTightAABB() const
Return a tight axis-aligned bounding box containing all primitives.
Definition: gkdtree.h:623
void run()
The thread&#39;s run method.
Definition: gkdtree.h:1485
IndexType * leftIndices
Definition: gkdtree.h:2507
size_t size()
Definition: gkdtree.h:1404
Float cost
Definition: gkdtree.h:1348
Parent of all Mitsuba classes.
Definition: object.h:38
#define MTS_EXPORT_RENDER
Definition: platform.h:109
SizeType primCount
Definition: gkdtree.h:1453
float castflt_down(float val)
Cast to single precision and round down if not exactly representable (passthrough) ...
Definition: math.h:297
MTS_EXPORT_CORE int log2i(uint32_t value)
Base-2 logarithm (32-bit integer version)
void buildInternal()
Build a KD-tree over the supplied geometry.
Definition: gkdtree.h:958
#define MTS_KD_MIN_ALLOC
OrderedChunkAllocator: don&#39;t create chunks smaller than 512 KiB.
Definition: gkdtree.h:40
bool m_retract
Definition: gkdtree.h:2598
AABBType nodeAABB
Definition: gkdtree.h:1451
Derived * cast()
Cast to the derived class.
Definition: gkdtree.h:1528
const Derived * cast() const
Cast to the derived class (const version)
Definition: gkdtree.h:1533
Partition partition(BuildContext &ctx, const Derived *derived, IndexType *primIndices, SplitCandidate &split, bool isLeftChild, Float traversalCost, Float queryCost)
Given a suitable split candiate, compute tight bounding boxes for the left and right subtrees and ret...
Definition: gkdtree.h:2522
FINLINE const KDNode *__restrict getSibling() const
Return the sibling of the current node.
Definition: gkdtree.h:557
AABBType::Scalar Scalar
Definition: gkdtree.h:718
bool getRetract() const
Return whether or not bad splits can be &quot;retracted&quot;.
Definition: gkdtree.h:865
SplitCandidate minimizeCost(Float traversalCost, Float queryCost)
Evaluate the tree construction heuristic at each bin boundary and return the minimizer for the given ...
Definition: gkdtree.h:2465
Float buildTree(BuildContext &ctx, unsigned int depth, KDNode *node, const AABBType &nodeAABB, EdgeEvent *eventStart, EdgeEvent *eventEnd, SizeType primCount, bool isLeftChild, SizeType badRefines)
Definition: gkdtree.h:1954
void setExactPrimitiveThreshold(SizeType exactPrimThreshold)
Specify the number of primitives, at which the builder will switch from (approximate) Min-Max binning...
Definition: gkdtree.h:922
SizeType m_indexCount
Definition: gkdtree.h:2605
Float m_traversalCost
Definition: gkdtree.h:2595
#define MTS_NAMESPACE_END
Definition: platform.h:138
Thin wrapper around the recursive boost thread lock.
Definition: lock.h:34
In addition to providing RAII-style locking, UniqueLock also allows for deferred locking until lock()...
Definition: lock.h:192
SizeType pruned
Definition: gkdtree.h:1391
bool done
Definition: gkdtree.h:1446
FINLINE IndexType getPrimEnd() const
Assuming this is a leaf node, return the last primitive index.
Definition: gkdtree.h:541
FINLINE KDNode *__restrict getLeft()
Return the left child (assuming that this is an interior node)
Definition: gkdtree.h:562
int axis
Definition: gkdtree.h:1350
Float buildTreeMinMax(BuildContext &ctx, unsigned int depth, KDNode *node, const AABBType &nodeAABB, const AABBType &tightAABB, IndexType *indices, SizeType primCount, bool isLeftChild, SizeType badRefines)
Build helper function (min-max binning)
Definition: gkdtree.h:1792
~TreeBuilder()
Definition: gkdtree.h:1480
~MinMaxBins()
Definition: gkdtree.h:2411
void setEmptySpaceBonus(Float emptySpaceBonus)
Set the bonus factor for empty space used by the tree construction heuristic.
Definition: gkdtree.h:799
EdgeEvent()
Dummy constructor.
Definition: gkdtree.h:1291