DHART
Loading...
Searching...
No Matches
nanort.h
Go to the documentation of this file.
1//
2// NanoRT, single header only modern ray tracing kernel.
3//
4
5//
6// Notes : The number of primitives are up to 2G. If you want to render large
7// data, please split data into chunks(~ 2G prims) and use NanoSG scene graph
8// library(`${nanort}/examples/nanosg`).
9//
10
11/*
12The MIT License (MIT)
13
14Copyright (c) 2015 - 2019 Light Transport Entertainment, Inc.
15
16Permission is hereby granted, free of charge, to any person obtaining a copy
17of this software and associated documentation files (the "Software"), to deal
18in the Software without restriction, including without limitation the rights
19to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20copies of the Software, and to permit persons to whom the Software is
21furnished to do so, subject to the following conditions:
22
23The above copyright notice and this permission notice shall be included in
24all copies or substantial portions of the Software.
25
26THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32THE SOFTWARE.
33*/
34
35#pragma once
36
37#ifndef NANORT_H_
38#define NANORT_H_
39
40#undef min
41#undef max
42
43#include <algorithm>
44#include <cassert>
45#include <cmath>
46#include <cstdio>
47#include <cstdlib>
48#include <cstring>
49#include <functional>
50#include <limits>
51#include <memory>
52#include <queue>
53#include <string>
54#include <vector>
55
56
57
58// compiler macros
59//
60// NANORT_USE_CPP11_FEATURE : Enable C++11 feature
61// NANORT_ENABLE_PARALLEL_BUILD : Enable parallel BVH build.
62// NANORT_ENABLE_SERIALIZATION : Enable serialization feature for built BVH.
63//
64// Parallelized BVH build is supported on C++11 thread version.
65// OpenMP version is not fully tested.
66// thus turn off if you face a problem when building BVH in parallel.
67// #define NANORT_ENABLE_PARALLEL_BUILD
68
69// Some constants
70#define kNANORT_MAX_STACK_DEPTH (512)
71#define kNANORT_MIN_PRIMITIVES_FOR_PARALLEL_BUILD (1024 * 8)
72#define kNANORT_SHALLOW_DEPTH (4) // will create 2**N subtrees
73
74#ifdef NANORT_USE_CPP11_FEATURE
75// Assume C++11 compiler has thread support.
76// In some situation (e.g. embedded system, JIT compilation), thread feature
77// may not be available though...
78#include <atomic>
79#include <mutex>
80#include <thread>
81
82#define kNANORT_MAX_THREADS (256)
83
84// Parallel build should work well for C++11 version, thus force enable it.
85#ifndef NANORT_ENABLE_PARALLEL_BUILD
86#define NANORT_ENABLE_PARALLEL_BUILD
87#endif
88
89#endif
90
91namespace nanort {
92
93 // RayType
94 typedef enum {
102
103#ifdef __clang__
104#pragma clang diagnostic push
105#if __has_warning("-Wzero-as-null-pointer-constant")
106#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
107#endif
108#endif
109
110 // ----------------------------------------------------------------------------
111 // Small vector class useful for multi-threaded environment.
112 //
113 // stack_container.h
114 //
115 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
116 // Use of this source code is governed by a BSD-style license that can be
117 // found in the LICENSE file.
118
119 // This allocator can be used with STL containers to provide a stack buffer
120 // from which to allocate memory and overflows onto the heap. This stack buffer
121 // would be allocated on the stack and allows us to avoid heap operations in
122 // some situations.
123 //
124 // STL likes to make copies of allocators, so the allocator itself can't hold
125 // the data. Instead, we make the creator responsible for creating a
126 // StackAllocator::Source which contains the data. Copying the allocator
127 // merely copies the pointer to this shared source, so all allocators created
128 // based on our allocator will share the same stack buffer.
129 //
130 // This stack buffer implementation is very simple. The first allocation that
131 // fits in the stack buffer will use the stack buffer. Any subsequent
132 // allocations will not use the stack buffer, even if there is unused room.
133 // This makes it appropriate for array-like containers, but the caller should
134 // be sure to reserve() in the container up to the stack buffer size. Otherwise
135 // the container will allocate a small array which will "use up" the stack
136 // buffer.
137 template <typename T, size_t stack_capacity>
138 class StackAllocator : public std::allocator<T> {
139 public:
140 typedef typename std::allocator<T>::pointer pointer;
141 typedef typename std::allocator<T>::size_type size_type;
142
143 // Backing store for the allocator. The container owner is responsible for
144 // maintaining this for as long as any containers using this allocator are
145 // live.
146 struct Source {
148
149 // Casts the buffer in its right type.
150 T* stack_buffer() { return reinterpret_cast<T*>(stack_buffer_); }
151 const T* stack_buffer() const {
152 return reinterpret_cast<const T*>(stack_buffer_);
153 }
154
155 //
156 // IMPORTANT: Take care to ensure that stack_buffer_ is aligned
157 // since it is used to mimic an array of T.
158 // Be careful while declaring any unaligned types (like bool)
159 // before stack_buffer_.
160 //
161
162 // The buffer itself. It is not of type T because we don't want the
163 // constructors and destructors to be automatically called. Define a POD
164 // buffer of the right size instead.
165 char stack_buffer_[sizeof(T[stack_capacity])];
166
167 // Set when the stack buffer is used for an allocation. We do not track
168 // how much of the buffer is used, only that somebody is using it.
170 };
171
172 // Used by containers when they want to refer to an allocator of type U.
173 template <typename U>
174 struct rebind {
176 };
177
178 // For the straight up copy c-tor, we can share storage.
180 : source_(rhs.source_) {}
181
182 // ISO C++ requires the following constructor to be defined,
183 // and std::vector in VC++2008SP1 Release fails with an error
184 // in the class _Container_base_aux_alloc_real (from <xutility>)
185 // if the constructor does not exist.
186 // For this constructor, we cannot share storage; there's
187 // no guarantee that the Source buffer of Ts is large enough
188 // for Us.
189 // TODO(Google): If we were fancy pants, perhaps we could share storage
190 // iff sizeof(T) == sizeof(U).
191 template <typename U, size_t other_capacity>
193 : source_(NULL) {
194 (void)other;
195 }
196
197 explicit StackAllocator(Source* source) : source_(source) {}
198
199 // Actually do the allocation. Use the stack buffer if nobody has used it yet
200 // and the size requested fits. Otherwise, fall through to the standard
201 // allocator.
202 pointer allocate(size_type n, void* hint = 0) {
203 if (source_ != NULL && !source_->used_stack_buffer_ &&
204 n <= stack_capacity) {
206 return source_->stack_buffer();
207 }
208 else {
209 return std::allocator<T>::allocate(n, hint);
210 }
211 }
212
213 // Free: when trying to free the stack buffer, just mark it as free. For
214 // non-stack-buffer pointers, just fall though to the standard allocator.
216 if (source_ != NULL && p == source_->stack_buffer())
218 else
219 std::allocator<T>::deallocate(p, n);
220 }
221
222 private:
224 };
225
226 // A wrapper around STL containers that maintains a stack-sized buffer that the
227 // initial capacity of the vector is based on. Growing the container beyond the
228 // stack capacity will transparently overflow onto the heap. The container must
229 // support reserve().
230 //
231 // WATCH OUT: the ContainerType MUST use the proper StackAllocator for this
232 // type. This object is really intended to be used only internally. You'll want
233 // to use the wrappers below for different types.
234 template <typename TContainerType, int stack_capacity>
236 public:
237 typedef TContainerType ContainerType;
238 typedef typename ContainerType::value_type ContainedType;
240
241 // Allocator must be constructed before the container!
243 // Make the container use the stack allocation by reserving our buffer size
244 // before doing anything else.
245 container_.reserve(stack_capacity);
246 }
247
248 // Getters for the actual container.
249 //
250 // Danger: any copies of this made using the copy constructor must have
251 // shorter lifetimes than the source. The copy will share the same allocator
252 // and therefore the same stack buffer as the original. Use std::copy to
253 // copy into a "real" container for longer-lived objects.
255 const ContainerType& container() const { return container_; }
256
257 // Support operator-> to get to the container. This allows nicer syntax like:
258 // StackContainer<...> foo;
259 // std::sort(foo->begin(), foo->end());
261 const ContainerType* operator->() const { return &container_; }
262
263#ifdef UNIT_TEST
264 // Retrieves the stack source so that that unit tests can verify that the
265 // buffer is being used properly.
266 const typename Allocator::Source& stack_data() const { return stack_data_; }
267#endif
268
269 protected:
270 typename Allocator::Source stack_data_;
271 unsigned char pad_[7];
274
275 // DISALLOW_EVIL_CONSTRUCTORS(StackContainer);
278 };
279
280 // StackVector
281 //
282 // Example:
283 // StackVector<int, 16> foo;
284 // foo->push_back(22); // we have overloaded operator->
285 // foo[0] = 10; // as well as operator[]
286 template <typename T, size_t stack_capacity>
288 : public StackContainer<std::vector<T, StackAllocator<T, stack_capacity> >,
289 stack_capacity> {
290 public:
292 : StackContainer<std::vector<T, StackAllocator<T, stack_capacity> >,
293 stack_capacity>() {}
294
295 // We need to put this in STL containers sometimes, which requires a copy
296 // constructor. We can't call the regular copy constructor because that will
297 // take the stack buffer from the original. Here, we create an empty object
298 // and make a stack buffer of its own.
300 : StackContainer<std::vector<T, StackAllocator<T, stack_capacity> >,
301 stack_capacity>() {
302 this->container().assign(other->begin(), other->end());
303 }
304
306 const StackVector<T, stack_capacity>& other) {
307 this->container().assign(other->begin(), other->end());
308 return *this;
309 }
310
311 // Vectors are commonly indexed, which isn't very convenient even with
312 // operator-> (using "->at()" does exception stuff we don't want).
313 T& operator[](size_t i) { return this->container().operator[](i); }
314 const T& operator[](size_t i) const {
315 return this->container().operator[](i);
316 }
317 };
318
319 // ----------------------------------------------------------------------------
320
321 template <typename T = float>
322 class real3 {
323 public:
324 real3() {}
325 real3(T x) {
326 v[0] = x;
327 v[1] = x;
328 v[2] = x;
329 }
330 real3(T xx, T yy, T zz) {
331 v[0] = xx;
332 v[1] = yy;
333 v[2] = zz;
334 }
335 explicit real3(const T* p) {
336 v[0] = p[0];
337 v[1] = p[1];
338 v[2] = p[2];
339 }
340
341 inline T x() const { return v[0]; }
342 inline T y() const { return v[1]; }
343 inline T z() const { return v[2]; }
344
345 real3 operator*(T f) const { return real3(x() * f, y() * f, z() * f); }
346 real3 operator-(const real3& f2) const {
347 return real3(x() - f2.x(), y() - f2.y(), z() - f2.z());
348 }
349 real3 operator*(const real3& f2) const {
350 return real3(x() * f2.x(), y() * f2.y(), z() * f2.z());
351 }
352 real3 operator+(const real3& f2) const {
353 return real3(x() + f2.x(), y() + f2.y(), z() + f2.z());
354 }
355 real3& operator+=(const real3& f2) {
356 v[0] += f2.x();
357 v[1] += f2.y();
358 v[2] += f2.z();
359 return (*this);
360 }
361 real3 operator/(const real3& f2) const {
362 return real3(x() / f2.x(), y() / f2.y(), z() / f2.z());
363 }
364 real3 operator-() const { return real3(-x(), -y(), -z()); }
365 T operator[](int i) const { return v[i]; }
366 T& operator[](int i) { return v[i]; }
367
368 T v[3];
369 // T pad; // for alignment (when T = float)
370 };
371
372 template <typename T>
373 inline real3<T> operator*(T f, const real3<T>& v) {
374 return real3<T>(v.x() * f, v.y() * f, v.z() * f);
375 }
376
377 template <typename T>
378 inline real3<T> vneg(const real3<T>& rhs) {
379 return real3<T>(-rhs.x(), -rhs.y(), -rhs.z());
380 }
381
382 template <typename T>
383 inline T vlength(const real3<T>& rhs) {
384 return std::sqrt(rhs.x() * rhs.x() + rhs.y() * rhs.y() + rhs.z() * rhs.z());
385 }
386
387 template <typename T>
388 inline real3<T> vnormalize(const real3<T>& rhs) {
389 real3<T> v = rhs;
390 T len = vlength(rhs);
391 if (std::fabs(len) > std::numeric_limits<T>::epsilon()) {
392 T inv_len = static_cast<T>(1.0) / len;
393 v.v[0] *= inv_len;
394 v.v[1] *= inv_len;
395 v.v[2] *= inv_len;
396 }
397 return v;
398 }
399
400 template <typename T>
401 inline real3<T> vcross(const real3<T> a, const real3<T> b) {
402 real3<T> c;
403 c[0] = a[1] * b[2] - a[2] * b[1];
404 c[1] = a[2] * b[0] - a[0] * b[2];
405 c[2] = a[0] * b[1] - a[1] * b[0];
406 return c;
407 }
408
409 template <typename T>
410 inline T vdot(const real3<T> a, const real3<T> b) {
411 return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
412 }
413
414 template <typename T>
416 real3<T> r;
417
418#ifdef NANORT_USE_CPP11_FEATURE
419
420 if (std::fabs(v[0]) < std::numeric_limits<T>::epsilon()) {
421 r[0] = std::numeric_limits<T>::infinity() *
422 std::copysign(static_cast<T>(1), v[0]);
423 }
424 else {
425 r[0] = static_cast<T>(1.0) / v[0];
426 }
427
428 if (std::fabs(v[1]) < std::numeric_limits<T>::epsilon()) {
429 r[1] = std::numeric_limits<T>::infinity() *
430 std::copysign(static_cast<T>(1), v[1]);
431 }
432 else {
433 r[1] = static_cast<T>(1.0) / v[1];
434 }
435
436 if (std::fabs(v[2]) < std::numeric_limits<T>::epsilon()) {
437 r[2] = std::numeric_limits<T>::infinity() *
438 std::copysign(static_cast<T>(1), v[2]);
439 }
440 else {
441 r[2] = static_cast<T>(1.0) / v[2];
442 }
443#else
444
445 if (std::fabs(v[0]) < std::numeric_limits<T>::epsilon()) {
446 T sgn = (v[0] < static_cast<T>(0)) ? static_cast<T>(-1) : static_cast<T>(1);
447 r[0] = std::numeric_limits<T>::infinity() * sgn;
448 }
449 else {
450 r[0] = static_cast<T>(1.0) / v[0];
451 }
452
453 if (std::fabs(v[1]) < std::numeric_limits<T>::epsilon()) {
454 T sgn = (v[1] < static_cast<T>(0)) ? static_cast<T>(-1) : static_cast<T>(1);
455 r[1] = std::numeric_limits<T>::infinity() * sgn;
456 }
457 else {
458 r[1] = static_cast<T>(1.0) / v[1];
459 }
460
461 if (std::fabs(v[2]) < std::numeric_limits<T>::epsilon()) {
462 T sgn = (v[2] < static_cast<T>(0)) ? static_cast<T>(-1) : static_cast<T>(1);
463 r[2] = std::numeric_limits<T>::infinity() * sgn;
464 }
465 else {
466 r[2] = static_cast<T>(1.0) / v[2];
467 }
468#endif
469
470 return r;
471 }
472
473 template <typename real>
474 inline const real* get_vertex_addr(const real* p, const size_t idx,
475 const size_t stride_bytes) {
476 return reinterpret_cast<const real*>(
477 reinterpret_cast<const unsigned char*>(p) + idx * stride_bytes);
478 }
479
480 template <typename T = float>
481 class Ray {
482 public:
484 : min_t(static_cast<T>(0.0)),
485 max_t( std::numeric_limits<T>::max()), // this is modified from original as it wraps numeric_limits in paranthesis to avoid windows macro
487 org[0] = static_cast<T>(0.0);
488 org[1] = static_cast<T>(0.0);
489 org[2] = static_cast<T>(0.0);
490 dir[0] = static_cast<T>(0.0);
491 dir[1] = static_cast<T>(0.0);
492 dir[2] = static_cast<T>(-1.0);
493 }
494
495 T org[3]; // must set
496 T dir[3]; // must set
497 T min_t; // minimum ray hit distance.
498 T max_t; // maximum ray hit distance.
499 unsigned int type; // ray type
500
501 // TODO(LTE): Align sizeof(Ray)
502 };
503
504 template <typename T = float>
505 class BVHNode {
506 public:
508 BVHNode(const BVHNode& rhs) {
509 bmin[0] = rhs.bmin[0];
510 bmin[1] = rhs.bmin[1];
511 bmin[2] = rhs.bmin[2];
512 flag = rhs.flag;
513
514 bmax[0] = rhs.bmax[0];
515 bmax[1] = rhs.bmax[1];
516 bmax[2] = rhs.bmax[2];
517 axis = rhs.axis;
518
519 data[0] = rhs.data[0];
520 data[1] = rhs.data[1];
521 }
522
523 BVHNode& operator=(const BVHNode& rhs) {
524 bmin[0] = rhs.bmin[0];
525 bmin[1] = rhs.bmin[1];
526 bmin[2] = rhs.bmin[2];
527 flag = rhs.flag;
528
529 bmax[0] = rhs.bmax[0];
530 bmax[1] = rhs.bmax[1];
531 bmax[2] = rhs.bmax[2];
532 axis = rhs.axis;
533
534 data[0] = rhs.data[0];
535 data[1] = rhs.data[1];
536
537 return (*this);
538 }
539
541
542 T bmin[3];
543 T bmax[3];
544
545 int flag; // 1 = leaf node, 0 = branch node
546 int axis;
547
548 // leaf
549 // data[0] = npoints
550 // data[1] = index
551 //
552 // branch
553 // data[0] = child[0]
554 // data[1] = child[1]
555 unsigned int data[2];
556 };
557
558 template <class H>
560 public:
561 bool operator()(const H& a, const H& b) const { return a.t < b.t; }
562 };
563
565 template <typename T = float>
569 unsigned int max_tree_depth;
570 unsigned int bin_size;
571 unsigned int shallow_depth;
573
574 // Cache bounding box computation.
575 // Requires more memory, but BVHbuild can be faster.
577 unsigned char pad[3];
578
579 // Set default value: Taabb = 0.2
581 : cost_t_aabb(static_cast<T>(0.2)),
583 max_tree_depth(256),
584 bin_size(64),
588 cache_bbox(false) {}
589 };
590
593 public:
594 unsigned int max_tree_depth;
595 unsigned int num_leaf_nodes;
596 unsigned int num_branch_nodes;
598
599 // Set default value: Taabb = 0.2
601 : max_tree_depth(0),
604 build_secs(0.0f) {}
605 };
606
611 public:
612 // Hit only for face IDs in indexRange.
613 // This feature is good to mimic something like glDrawArrays()
614 unsigned int prim_ids_range[2];
615
616 // Prim ID to skip for avoiding self-intersection
617 // -1 = no skipping
618 unsigned int skip_prim_id;
619
621 unsigned char pad[3];
622
624 prim_ids_range[0] = 0;
625 prim_ids_range[1] = 0x7FFFFFFF; // Up to 2G face IDs.
626
627 skip_prim_id = static_cast<unsigned int>(-1);
628 cull_back_face = false;
629 }
630 };
631
635 template <typename T>
636 class BBox {
637 public:
640
642 bmin[0] = bmin[1] = bmin[2] = std::numeric_limits<T>::max();
643 bmax[0] = bmax[1] = bmax[2] = -std::numeric_limits<T>::max();
644 }
645 };
646
653 template <typename T>
654 class NodeHit {
655 public:
657 : t_min(std::numeric_limits<T>::max()),
658 t_max(-std::numeric_limits<T>::max()),
659 node_id(static_cast<unsigned int>(-1)) {}
660
661 NodeHit(const NodeHit<T>& rhs) {
662 t_min = rhs.t_min;
663 t_max = rhs.t_max;
664 node_id = rhs.node_id;
665 }
666
668 t_min = rhs.t_min;
669 t_max = rhs.t_max;
670 node_id = rhs.node_id;
671
672 return (*this);
673 }
674
676
679 unsigned int node_id;
680 };
681
687 template <typename T>
689 public:
690 inline bool operator()(const NodeHit<T>& a, const NodeHit<T>& b) {
691 return a.t_min < b.t_min;
692 }
693 };
694
704 template <typename T>
705 class BVHAccel {
706 public:
707 BVHAccel() : pad0_(0) { (void)pad0_; }
709
722 template <class Prim, class Pred>
723 bool Build(const unsigned int num_primitives, const Prim& p, const Pred& pred,
724 const BVHBuildOptions<T>& options = BVHBuildOptions<T>());
725
732
733#if defined(NANORT_ENABLE_SERIALIZATION)
737 bool Dump(const char* filename) const;
738 bool Dump(FILE* fp) const;
739
743 bool Load(const char* filename);
744 bool Load(FILE* fp);
745#endif
746
747 void Debug();
748
763 template <class I, class H>
764 bool Traverse(const Ray<T>& ray, const I& intersector, H* isect,
765 const BVHTraceOptions& options = BVHTraceOptions()) const;
766
767#if 0
770 template<class I, class H, class Comp>
771 bool MultiHitTraverse(const Ray<T>& ray,
772 int max_intersections,
773 const I& intersector,
774 StackVector<H, 128>* isects,
775 const BVHTraceOptions& options = BVHTraceOptions()) const;
776#endif
777
787 template <class I>
788 bool ListNodeIntersections(const Ray<T>& ray, int max_intersections,
789 const I& intersector,
790 StackVector<NodeHit<T>, 128>* hits) const;
791
792 const std::vector<BVHNode<T> >& GetNodes() const { return nodes_; }
793 const std::vector<unsigned int>& GetIndices() const { return indices_; }
794
798 void BoundingBox(T bmin[3], T bmax[3]) const {
799 if (nodes_.empty()) {
800 bmin[0] = bmin[1] = bmin[2] = std::numeric_limits<T>::max();
801 bmax[0] = bmax[1] = bmax[2] = -std::numeric_limits<T>::max();
802 }
803 else {
804 bmin[0] = nodes_[0].bmin[0];
805 bmin[1] = nodes_[0].bmin[1];
806 bmin[2] = nodes_[0].bmin[2];
807 bmax[0] = nodes_[0].bmax[0];
808 bmax[1] = nodes_[0].bmax[1];
809 bmax[2] = nodes_[0].bmax[2];
810 }
811 }
812
813 bool IsValid() const { return nodes_.size() > 0; }
814
815 private:
816#if defined(NANORT_ENABLE_PARALLEL_BUILD)
817 typedef struct {
818 unsigned int left_idx;
819 unsigned int right_idx;
820 unsigned int offset;
821 } ShallowNodeInfo;
822
823 // Used only during BVH construction
824 std::vector<ShallowNodeInfo> shallow_node_infos_;
825
827 template <class P, class Pred>
828 unsigned int BuildShallowTree(std::vector<BVHNode<T> >* out_nodes,
829 unsigned int left_idx, unsigned int right_idx,
830 unsigned int depth,
831 unsigned int max_shallow_depth, const P& p,
832 const Pred& pred);
833#endif
834
836 template <class P, class Pred>
837 unsigned int BuildTree(BVHBuildStatistics* out_stat,
838 std::vector<BVHNode<T> >* out_nodes,
839 unsigned int left_idx, unsigned int right_idx,
840 unsigned int depth, const P& p, const Pred& pred);
841
842 template <class I>
843 bool TestLeafNode(const BVHNode<T>& node, const Ray<T>& ray,
844 const I& intersector) const;
845
846 template <class I>
848 const BVHNode<T>& node, const Ray<T>& ray, const int max_intersections,
849 const I& intersector,
850 std::priority_queue<NodeHit<T>, std::vector<NodeHit<T> >,
851 NodeHitComparator<T> >* isect_pq) const;
852
853#if 0
854 template<class I, class H, class Comp>
855 bool MultiHitTestLeafNode(std::priority_queue<H, std::vector<H>, Comp>* isect_pq,
856 int max_intersections,
857 const BVHNode<T>& node, const Ray<T>& ray,
858 const I& intersector) const;
859#endif
860
861 std::vector<BVHNode<T> > nodes_;
862 std::vector<unsigned int> indices_; // max 4G triangles.
863 std::vector<BBox<T> > bboxes_;
866 unsigned int pad0_;
867 };
868
869 // Predefined SAH predicator for triangle.
870 template <typename T = float>
872 public:
874 const T* vertices, const unsigned int* faces,
875 size_t vertex_stride_bytes) // e.g. 12 for sizeof(float) * XYZ
876 : axis_(0),
877 pos_(static_cast<T>(0.0)),
878 vertices_(vertices),
879 faces_(faces),
880 vertex_stride_bytes_(vertex_stride_bytes) {}
881
883 : axis_(rhs.axis_),
884 pos_(rhs.pos_),
885 vertices_(rhs.vertices_),
886 faces_(rhs.faces_),
888
890 axis_ = rhs.axis_;
891 pos_ = rhs.pos_;
892 vertices_ = rhs.vertices_;
893 faces_ = rhs.faces_;
895
896 return (*this);
897 }
898
899 void Set(int axis, T pos) const {
900 axis_ = axis;
901 pos_ = pos;
902 }
903
904 bool operator()(unsigned int i) const {
905 int axis = axis_;
906 T pos = pos_;
907
908 unsigned int i0 = faces_[3 * i + 0];
909 unsigned int i1 = faces_[3 * i + 1];
910 unsigned int i2 = faces_[3 * i + 2];
911
912 real3<T> p0(get_vertex_addr<T>(vertices_, i0, vertex_stride_bytes_));
913 real3<T> p1(get_vertex_addr<T>(vertices_, i1, vertex_stride_bytes_));
914 real3<T> p2(get_vertex_addr<T>(vertices_, i2, vertex_stride_bytes_));
915
916 T center = p0[axis] + p1[axis] + p2[axis];
917
918 return (center < pos* static_cast<T>(3.0));
919 }
920
921 private:
922 mutable int axis_;
923 mutable T pos_;
924 const T* vertices_;
925 const unsigned int* faces_;
927 };
928
929 // Predefined Triangle mesh geometry.
930 template <typename T = float>
932 public:
934 const T* vertices, const unsigned int* faces,
935 const size_t vertex_stride_bytes) // e.g. 12 for sizeof(float) * XYZ
936 : vertices_(vertices),
937 faces_(faces),
938 vertex_stride_bytes_(vertex_stride_bytes) {}
939
942 void BoundingBox(real3<T>* bmin, real3<T>* bmax,
943 unsigned int prim_index) const {
944 unsigned vertex = faces_[3 * prim_index + 0];
945
946 (*bmin)[0] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[0];
947 (*bmin)[1] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[1];
948 (*bmin)[2] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[2];
949 (*bmax)[0] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[0];
950 (*bmax)[1] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[1];
951 (*bmax)[2] = get_vertex_addr(vertices_, vertex, vertex_stride_bytes_)[2];
952
953 // remaining two vertices of the primitive
954 for (unsigned int i = 1; i < 3; i++) {
955 // xyz
956 for (int k = 0; k < 3; k++) {
957 T coord = get_vertex_addr<T>(vertices_, faces_[3 * prim_index + i],
959
960 (*bmin)[k] = std::min((*bmin)[k], coord);
961 (*bmax)[k] = std::max((*bmax)[k], coord);
962 }
963 }
964 }
965
966 const T* vertices_;
967 const unsigned int* faces_;
969 };
970
971 template <typename T = float>
973 public:
974 T u;
975 T v;
976
977 // Required member variables.
978 T t;
979 unsigned int prim_id;
980 };
981
982 template <typename T = float, class H = TriangleIntersection<T> >
984 public:
985 TriangleIntersector(const T* vertices, const unsigned int* faces,
986 const size_t vertex_stride_bytes) // e.g.
987 // vertex_stride_bytes
988 // = 12 = sizeof(float)
989 // * 3
990 : vertices_(vertices),
991 faces_(faces),
992 vertex_stride_bytes_(vertex_stride_bytes) {}
993
994 // For Watertight Ray/Triangle Intersection.
995 typedef struct {
996 T Sx;
997 T Sy;
998 T Sz;
999 int kx;
1000 int ky;
1001 int kz;
1002 } RayCoeff;
1003
1007 bool Intersect(T* t_inout, const unsigned int prim_index) const {
1008 if ((prim_index < trace_options_.prim_ids_range[0]) ||
1009 (prim_index >= trace_options_.prim_ids_range[1])) {
1010 return false;
1011 }
1012
1013 // Self-intersection test.
1014 if (prim_index == trace_options_.skip_prim_id) {
1015 return false;
1016 }
1017
1018 const unsigned int f0 = faces_[3 * prim_index + 0];
1019 const unsigned int f1 = faces_[3 * prim_index + 1];
1020 const unsigned int f2 = faces_[3 * prim_index + 2];
1021
1025
1026 const real3<T> A = p0 - ray_org_;
1027 const real3<T> B = p1 - ray_org_;
1028 const real3<T> C = p2 - ray_org_;
1029
1030 const T Ax = A[ray_coeff_.kx] - ray_coeff_.Sx * A[ray_coeff_.kz];
1031 const T Ay = A[ray_coeff_.ky] - ray_coeff_.Sy * A[ray_coeff_.kz];
1032 const T Bx = B[ray_coeff_.kx] - ray_coeff_.Sx * B[ray_coeff_.kz];
1033 const T By = B[ray_coeff_.ky] - ray_coeff_.Sy * B[ray_coeff_.kz];
1034 const T Cx = C[ray_coeff_.kx] - ray_coeff_.Sx * C[ray_coeff_.kz];
1035 const T Cy = C[ray_coeff_.ky] - ray_coeff_.Sy * C[ray_coeff_.kz];
1036
1037 T U = Cx * By - Cy * Bx;
1038 T V = Ax * Cy - Ay * Cx;
1039 T W = Bx * Ay - By * Ax;
1040
1041#ifdef __clang__
1042#pragma clang diagnostic push
1043#pragma clang diagnostic ignored "-Wfloat-equal"
1044#endif
1045
1046 // Fall back to test against edges using double precision.
1047 if (U == static_cast<T>(0.0) || V == static_cast<T>(0.0) ||
1048 W == static_cast<T>(0.0)) {
1049 double CxBy = static_cast<double>(Cx) * static_cast<double>(By);
1050 double CyBx = static_cast<double>(Cy) * static_cast<double>(Bx);
1051 U = static_cast<T>(CxBy - CyBx);
1052
1053 double AxCy = static_cast<double>(Ax) * static_cast<double>(Cy);
1054 double AyCx = static_cast<double>(Ay) * static_cast<double>(Cx);
1055 V = static_cast<T>(AxCy - AyCx);
1056
1057 double BxAy = static_cast<double>(Bx) * static_cast<double>(Ay);
1058 double ByAx = static_cast<double>(By) * static_cast<double>(Ax);
1059 W = static_cast<T>(BxAy - ByAx);
1060 }
1061
1062 if (U < static_cast<T>(0.0) || V < static_cast<T>(0.0) ||
1063 W < static_cast<T>(0.0)) {
1065 (U > static_cast<T>(0.0) || V > static_cast<T>(0.0) ||
1066 W > static_cast<T>(0.0))) {
1067 return false;
1068 }
1069 }
1070
1071 T det = U + V + W;
1072 if (det == static_cast<T>(0.0)) return false;
1073
1074#ifdef __clang__
1075#pragma clang diagnostic pop
1076#endif
1077
1078 const T Az = ray_coeff_.Sz * A[ray_coeff_.kz];
1079 const T Bz = ray_coeff_.Sz * B[ray_coeff_.kz];
1080 const T Cz = ray_coeff_.Sz * C[ray_coeff_.kz];
1081 const T D = U * Az + V * Bz + W * Cz;
1082
1083 const T rcpDet = static_cast<T>(1.0) / det;
1084 T tt = D * rcpDet;
1085
1086 if (tt > (*t_inout)) {
1087 return false;
1088 }
1089
1090 if (tt < t_min_) {
1091 return false;
1092 }
1093
1094 (*t_inout) = tt;
1095 // Use Möller-Trumbore style barycentric coordinates
1096 // U + V + W = 1.0 and interp(p) = U * p0 + V * p1 + W * p2
1097 // We want interp(p) = (1 - u - v) * p0 + u * v1 + v * p2;
1098 // => u = V, v = W.
1099 u_ = V * rcpDet;
1100 v_ = W * rcpDet;
1101
1102 return true;
1103 }
1104
1106 T GetT() const { return t_; }
1107
1109 void Update(T t, unsigned int prim_idx) const {
1110 t_ = t;
1111 prim_id_ = prim_idx;
1112 }
1113
1116 void PrepareTraversal(const Ray<T>& ray,
1117 const BVHTraceOptions& trace_options) const {
1118 ray_org_[0] = ray.org[0];
1119 ray_org_[1] = ray.org[1];
1120 ray_org_[2] = ray.org[2];
1121
1122 // Calculate dimension where the ray direction is maximal.
1123 ray_coeff_.kz = 0;
1124 T absDir = std::fabs(ray.dir[0]);
1125 if (absDir < std::fabs(ray.dir[1])) {
1126 ray_coeff_.kz = 1;
1127 absDir = std::fabs(ray.dir[1]);
1128 }
1129 if (absDir < std::fabs(ray.dir[2])) {
1130 ray_coeff_.kz = 2;
1131 absDir = std::fabs(ray.dir[2]);
1132 }
1133
1134 ray_coeff_.kx = ray_coeff_.kz + 1;
1135 if (ray_coeff_.kx == 3) ray_coeff_.kx = 0;
1136 ray_coeff_.ky = ray_coeff_.kx + 1;
1137 if (ray_coeff_.ky == 3) ray_coeff_.ky = 0;
1138
1139 // Swap kx and ky dimension to preserve winding direction of triangles.
1140 if (ray.dir[ray_coeff_.kz] < static_cast<T>(0.0))
1141 std::swap(ray_coeff_.kx, ray_coeff_.ky);
1142
1143 // Calculate shear constants.
1144 ray_coeff_.Sx = ray.dir[ray_coeff_.kx] / ray.dir[ray_coeff_.kz];
1145 ray_coeff_.Sy = ray.dir[ray_coeff_.ky] / ray.dir[ray_coeff_.kz];
1146 ray_coeff_.Sz = static_cast<T>(1.0) / ray.dir[ray_coeff_.kz];
1147
1148 trace_options_ = trace_options;
1149
1150 t_min_ = ray.min_t;
1151
1152 u_ = static_cast<T>(0.0);
1153 v_ = static_cast<T>(0.0);
1154 }
1155
1158 void PostTraversal(const Ray<T>& ray, bool hit, H* isect) const {
1159 if (hit && isect) {
1160 (*isect).t = t_;
1161 (*isect).u = u_;
1162 (*isect).v = v_;
1163 (*isect).prim_id = prim_id_;
1164 }
1165 (void)ray;
1166 }
1167
1168 private:
1169 const T* vertices_;
1170 const unsigned int* faces_;
1172
1176 mutable T t_min_;
1177
1178 mutable T t_;
1179 mutable T u_;
1180 mutable T v_;
1181 mutable unsigned int prim_id_;
1182 };
1183
1184 //
1185 // Robust BVH Ray Traversal : http://jcgt.org/published/0002/02/02/paper.pdf
1186 //
1187
1188 // NaN-safe min and max function.
1189 template <class T>
1190 const T& safemin(const T& a, const T& b) {
1191 return (a < b) ? a : b;
1192 }
1193 template <class T>
1194 const T& safemax(const T& a, const T& b) {
1195 return (a > b) ? a : b;
1196 }
1197
1198 //
1199 // SAH functions
1200 //
1201 struct BinBuffer {
1202 explicit BinBuffer(unsigned int size) {
1203 bin_size = size;
1204 bin.resize(2 * 3 * size);
1205 clear();
1206 }
1207
1208 void clear() { memset(&bin[0], 0, sizeof(size_t) * 2 * 3 * bin_size); }
1209
1210 std::vector<size_t> bin; // (min, max) * xyz * binsize
1211 unsigned int bin_size;
1212 unsigned int pad0;
1213 };
1214
1215 template <typename T>
1216 inline T CalculateSurfaceArea(const real3<T>& min, const real3<T>& max) {
1217 real3<T> box = max - min;
1218 return static_cast<T>(2.0) *
1219 (box[0] * box[1] + box[1] * box[2] + box[2] * box[0]);
1220 }
1221
1222 template <typename T>
1224 const T* vertices,
1225 const unsigned int* faces,
1226 unsigned int index) {
1227 unsigned int f0 = faces[3 * index + 0];
1228 unsigned int f1 = faces[3 * index + 1];
1229 unsigned int f2 = faces[3 * index + 2];
1230
1231 real3<T> p[3];
1232
1233 p[0] = real3<T>(&vertices[3 * f0]);
1234 p[1] = real3<T>(&vertices[3 * f1]);
1235 p[2] = real3<T>(&vertices[3 * f2]);
1236
1237 (*bmin) = p[0];
1238 (*bmax) = p[0];
1239
1240 for (int i = 1; i < 3; i++) {
1241 (*bmin)[0] = std::min((*bmin)[0], p[i][0]);
1242 (*bmin)[1] = std::min((*bmin)[1], p[i][1]);
1243 (*bmin)[2] = std::min((*bmin)[2], p[i][2]);
1244
1245 (*bmax)[0] = std::max((*bmax)[0], p[i][0]);
1246 (*bmax)[1] = std::max((*bmax)[1], p[i][1]);
1247 (*bmax)[2] = std::max((*bmax)[2], p[i][2]);
1248 }
1249 }
1250
1251 template <typename T, class P>
1252 inline void ContributeBinBuffer(BinBuffer* bins, // [out]
1253 const real3<T>& scene_min,
1254 const real3<T>& scene_max,
1255 unsigned int* indices, unsigned int left_idx,
1256 unsigned int right_idx, const P& p) {
1257 T bin_size = static_cast<T>(bins->bin_size);
1258
1259 // Calculate extent
1260 real3<T> scene_size, scene_inv_size;
1261 scene_size = scene_max - scene_min;
1262
1263 for (int i = 0; i < 3; ++i) {
1264 assert(scene_size[i] >= static_cast<T>(0.0));
1265
1266 if (scene_size[i] > static_cast<T>(0.0)) {
1267 scene_inv_size[i] = bin_size / scene_size[i];
1268 }
1269 else {
1270 scene_inv_size[i] = static_cast<T>(0.0);
1271 }
1272 }
1273
1274 // Clear bin data
1275 std::fill(bins->bin.begin(), bins->bin.end(), 0);
1276 // memset(&bins->bin[0], 0, sizeof(2 * 3 * bins->bin_size));
1277
1278 size_t idx_bmin[3];
1279 size_t idx_bmax[3];
1280
1281 for (size_t i = left_idx; i < right_idx; i++) {
1282 //
1283 // Quantize the position into [0, BIN_SIZE)
1284 //
1285 // q[i] = (int)(p[i] - scene_bmin) / scene_size
1286 //
1287 real3<T> bmin;
1288 real3<T> bmax;
1289
1290 p.BoundingBox(&bmin, &bmax, indices[i]);
1291 // GetBoundingBoxOfTriangle(&bmin, &bmax, vertices, faces, indices[i]);
1292
1293 real3<T> quantized_bmin = (bmin - scene_min) * scene_inv_size;
1294 real3<T> quantized_bmax = (bmax - scene_min) * scene_inv_size;
1295
1296 // idx is now in [0, BIN_SIZE)
1297 for (int j = 0; j < 3; ++j) {
1298 int q0 = static_cast<int>(quantized_bmin[j]);
1299 if (q0 < 0) q0 = 0;
1300 int q1 = static_cast<int>(quantized_bmax[j]);
1301 if (q1 < 0) q1 = 0;
1302
1303 idx_bmin[j] = static_cast<unsigned int>(q0);
1304 idx_bmax[j] = static_cast<unsigned int>(q1);
1305
1306 if (idx_bmin[j] >= bin_size)
1307 idx_bmin[j] = static_cast<unsigned int>(bin_size) - 1;
1308
1309 if (idx_bmax[j] >= bin_size)
1310 idx_bmax[j] = static_cast<unsigned int>(bin_size) - 1;
1311
1312 // Increment bin counter
1313 bins->bin[0 * (bins->bin_size * 3) +
1314 static_cast<size_t>(j) * bins->bin_size + idx_bmin[j]] += 1;
1315 bins->bin[1 * (bins->bin_size * 3) +
1316 static_cast<size_t>(j) * bins->bin_size + idx_bmax[j]] += 1;
1317 }
1318 }
1319 }
1320
1321 template <typename T>
1322 inline T SAH(size_t ns1, T leftArea, size_t ns2, T rightArea, T invS, T Taabb,
1323 T Ttri) {
1324 T sah;
1325
1326 sah = static_cast<T>(2.0) * Taabb +
1327 (leftArea * invS) * static_cast<T>(ns1) * Ttri +
1328 (rightArea * invS) * static_cast<T>(ns2) * Ttri;
1329
1330 return sah;
1331 }
1332
1333 template <typename T>
1334 inline bool FindCutFromBinBuffer(T* cut_pos, // [out] xyz
1335 int* minCostAxis, // [out]
1336 const BinBuffer* bins, const real3<T>& bmin,
1337 const real3<T>& bmax, size_t num_primitives,
1338 T costTaabb) { // should be in [0.0, 1.0]
1339 const T kEPS = std::numeric_limits<T>::epsilon(); // * epsScale;
1340
1341 size_t left, right;
1342 real3<T> bsize, bstep;
1343 real3<T> bminLeft, bmaxLeft;
1344 real3<T> bminRight, bmaxRight;
1345 T saLeft, saRight, saTotal;
1346 T pos;
1347 T minCost[3];
1348
1349 T costTtri = static_cast<T>(1.0) - costTaabb;
1350
1351 (*minCostAxis) = 0;
1352
1353 bsize = bmax - bmin;
1354 bstep = bsize * (static_cast<T>(1.0) / bins->bin_size);
1355 saTotal = CalculateSurfaceArea(bmin, bmax);
1356
1357 T invSaTotal = static_cast<T>(0.0);
1358 if (saTotal > kEPS) {
1359 invSaTotal = static_cast<T>(1.0) / saTotal;
1360 }
1361
1362 for (int j = 0; j < 3; ++j) {
1363 //
1364 // Compute SAH cost for the right side of each cell of the bbox.
1365 // Exclude both extreme sides of the bbox.
1366 //
1367 // i: 0 1 2 3
1368 // +----+----+----+----+----+
1369 // | | | | | |
1370 // +----+----+----+----+----+
1371 //
1372
1373 T minCostPos = bmin[j] + static_cast<T>(1.0) * bstep[j];
1374 minCost[j] = std::numeric_limits<T>::max();
1375
1376 left = 0;
1377 right = num_primitives;
1378 bminLeft = bminRight = bmin;
1379 bmaxLeft = bmaxRight = bmax;
1380
1381 for (int i = 0; i < static_cast<int>(bins->bin_size) - 1; ++i) {
1382 left += bins->bin[0 * (3 * bins->bin_size) +
1383 static_cast<size_t>(j) * bins->bin_size +
1384 static_cast<size_t>(i)];
1385 right -= bins->bin[1 * (3 * bins->bin_size) +
1386 static_cast<size_t>(j) * bins->bin_size +
1387 static_cast<size_t>(i)];
1388
1389 assert(left <= num_primitives);
1390 assert(right <= num_primitives);
1391
1392 //
1393 // Split pos bmin + (i + 1) * (bsize / BIN_SIZE)
1394 // +1 for i since we want a position on right side of the cell.
1395 //
1396
1397 pos = bmin[j] + (i + static_cast<T>(1.0)) * bstep[j];
1398 bmaxLeft[j] = pos;
1399 bminRight[j] = pos;
1400
1401 saLeft = CalculateSurfaceArea(bminLeft, bmaxLeft);
1402 saRight = CalculateSurfaceArea(bminRight, bmaxRight);
1403
1404 T cost =
1405 SAH(left, saLeft, right, saRight, invSaTotal, costTaabb, costTtri);
1406
1407 if (cost < minCost[j]) {
1408 //
1409 // Update the min cost
1410 //
1411 minCost[j] = cost;
1412 minCostPos = pos;
1413 // minCostAxis = j;
1414 }
1415 }
1416
1417 cut_pos[j] = minCostPos;
1418 }
1419
1420 // cut_axis = minCostAxis;
1421 // cut_pos = minCostPos;
1422
1423 // Find min cost axis
1424 T cost = minCost[0];
1425 (*minCostAxis) = 0;
1426
1427 if (cost > minCost[1]) {
1428 (*minCostAxis) = 1;
1429 cost = minCost[1];
1430 }
1431 if (cost > minCost[2]) {
1432 (*minCostAxis) = 2;
1433 cost = minCost[2];
1434 }
1435
1436 return true;
1437 }
1438
1439#ifdef _OPENMP
1440 template <typename T, class P>
1441 void ComputeBoundingBoxOMP(real3<T>* bmin, real3<T>* bmax,
1442 const unsigned int* indices, unsigned int left_index,
1443 unsigned int right_index, const P& p) {
1444 { p.BoundingBox(bmin, bmax, indices[left_index]); }
1445
1446 T local_bmin[3] = { (*bmin)[0], (*bmin)[1], (*bmin)[2] };
1447 T local_bmax[3] = { (*bmax)[0], (*bmax)[1], (*bmax)[2] };
1448
1449 unsigned int n = right_index - left_index;
1450
1451#pragma omp parallel firstprivate(local_bmin, local_bmax) if (n > (1024 * 128))
1452 {
1453#pragma omp parallel for
1454 // for each face
1455 for (int i = int(left_index); i < int(right_index); i++) {
1456 unsigned int idx = indices[i];
1457
1458 real3<T> bbox_min, bbox_max;
1459
1460 p.BoundingBox(&bbox_min, &bbox_max, idx);
1461
1462 // xyz
1463 for (int k = 0; k < 3; k++) {
1464 (*bmin)[k] = std::min((*bmin)[k], bbox_min[k]);
1465 (*bmax)[k] = std::max((*bmax)[k], bbox_max[k]);
1466 }
1467 }
1468
1469#pragma omp critical
1470 {
1471 for (int k = 0; k < 3; k++) {
1472 (*bmin)[k] = std::min((*bmin)[k], local_bmin[k]);
1473 (*bmax)[k] = std::max((*bmax)[k], local_bmax[k]);
1474 }
1475 }
1476 }
1477 }
1478#endif
1479
1480#ifdef NANORT_USE_CPP11_FEATURE
1481 template <typename T, class P>
1482 inline void ComputeBoundingBoxThreaded(real3<T>* bmin, real3<T>* bmax,
1483 const unsigned int* indices,
1484 unsigned int left_index,
1485 unsigned int right_index, const P& p) {
1486 unsigned int n = right_index - left_index;
1487
1488 size_t num_threads = std::min(
1489 size_t(kNANORT_MAX_THREADS),
1490 std::max(size_t(1), size_t(std::thread::hardware_concurrency())));
1491
1492 if (n < num_threads) {
1493 num_threads = n;
1494 }
1495
1496 std::vector<std::thread> workers;
1497
1498 size_t ndiv = n / num_threads;
1499
1500 std::vector<T> local_bmins(3 * num_threads); // 3 = xyz
1501 std::vector<T> local_bmaxs(3 * num_threads); // 3 = xyz
1502
1503 for (size_t t = 0; t < num_threads; t++) {
1504 workers.emplace_back(std::thread([&, t]() {
1505 size_t si = left_index + t * ndiv;
1506 size_t ei = (t == (num_threads - 1)) ? size_t(right_index) : std::min(left_index + (t + 1) * ndiv, size_t(right_index));
1507
1508 local_bmins[3 * t + 0] = std::numeric_limits<T>::infinity();
1509 local_bmins[3 * t + 1] = std::numeric_limits<T>::infinity();
1510 local_bmins[3 * t + 2] = std::numeric_limits<T>::infinity();
1511 local_bmaxs[3 * t + 0] = -std::numeric_limits<T>::infinity();
1512 local_bmaxs[3 * t + 1] = -std::numeric_limits<T>::infinity();
1513 local_bmaxs[3 * t + 2] = -std::numeric_limits<T>::infinity();
1514
1515 // for each face
1516 for (size_t i = si; i < ei; i++) {
1517 unsigned int idx = indices[i];
1518
1519 real3<T> bbox_min, bbox_max;
1520 p.BoundingBox(&bbox_min, &bbox_max, idx);
1521
1522 // xyz
1523 for (size_t k = 0; k < 3; k++) {
1524 local_bmins[3 * t + k] =
1525 std::min(local_bmins[3 * t + k], bbox_min[int(k)]);
1526 local_bmaxs[3 * t + k] =
1527 std::max(local_bmaxs[3 * t + k], bbox_max[int(k)]);
1528 }
1529 }
1530 }));
1531 }
1532
1533 for (auto& t : workers) {
1534 t.join();
1535 }
1536
1537 // merge bbox
1538 for (size_t k = 0; k < 3; k++) {
1539 (*bmin)[int(k)] = local_bmins[k];
1540 (*bmax)[int(k)] = local_bmaxs[k];
1541 }
1542
1543 for (size_t t = 1; t < num_threads; t++) {
1544 for (size_t k = 0; k < 3; k++) {
1545 (*bmin)[int(k)] = std::min((*bmin)[int(k)], local_bmins[3 * t + k]);
1546 (*bmax)[int(k)] = std::max((*bmax)[int(k)], local_bmaxs[3 * t + k]);
1547 }
1548 }
1549 }
1550#endif
1551
1552 template <typename T, class P>
1553 inline void ComputeBoundingBox(real3<T>* bmin, real3<T>* bmax,
1554 const unsigned int* indices,
1555 unsigned int left_index,
1556 unsigned int right_index, const P& p) {
1557 unsigned int idx = indices[left_index];
1558 p.BoundingBox(bmin, bmax, idx);
1559
1560 {
1561 // for each primitive
1562 for (unsigned int i = left_index + 1; i < right_index; i++) {
1563 idx = indices[i];
1564 real3<T> bbox_min, bbox_max;
1565 p.BoundingBox(&bbox_min, &bbox_max, idx);
1566
1567 // xyz
1568 for (int k = 0; k < 3; k++) {
1569 (*bmin)[k] = std::min((*bmin)[k], bbox_min[k]);
1570 (*bmax)[k] = std::max((*bmax)[k], bbox_max[k]);
1571 }
1572 }
1573 }
1574 }
1575
1576 template <typename T>
1577 inline void GetBoundingBox(real3<T>* bmin, real3<T>* bmax,
1578 const std::vector<BBox<T> >& bboxes,
1579 unsigned int* indices, unsigned int left_index,
1580 unsigned int right_index) {
1581 unsigned int i = left_index;
1582 unsigned int idx = indices[i];
1583
1584 (*bmin)[0] = bboxes[idx].bmin[0];
1585 (*bmin)[1] = bboxes[idx].bmin[1];
1586 (*bmin)[2] = bboxes[idx].bmin[2];
1587 (*bmax)[0] = bboxes[idx].bmax[0];
1588 (*bmax)[1] = bboxes[idx].bmax[1];
1589 (*bmax)[2] = bboxes[idx].bmax[2];
1590
1591 // for each face
1592 for (i = left_index + 1; i < right_index; i++) {
1593 idx = indices[i];
1594
1595 // xyz
1596 for (int k = 0; k < 3; k++) {
1597 (*bmin)[k] = std::min((*bmin)[k], bboxes[idx].bmin[k]);
1598 (*bmax)[k] = std::max((*bmax)[k], bboxes[idx].bmax[k]);
1599 }
1600 }
1601 }
1602
1603 //
1604 // --
1605 //
1606
1607#if defined(NANORT_ENABLE_PARALLEL_BUILD)
1608 template <typename T>
1609 template <class P, class Pred>
1610 unsigned int BVHAccel<T>::BuildShallowTree(std::vector<BVHNode<T> >* out_nodes,
1611 unsigned int left_idx,
1612 unsigned int right_idx,
1613 unsigned int depth,
1614 unsigned int max_shallow_depth,
1615 const P& p, const Pred& pred) {
1616 assert(left_idx <= right_idx);
1617
1618 unsigned int offset = static_cast<unsigned int>(out_nodes->size());
1619
1620 if (stats_.max_tree_depth < depth) {
1621 stats_.max_tree_depth = depth;
1622 }
1623
1624 real3<T> bmin, bmax;
1625
1626#if defined(NANORT_USE_CPP11_FEATURE) && defined(NANORT_ENABLE_PARALLEL_BUILD)
1627 ComputeBoundingBoxThreaded(&bmin, &bmax, &indices_.at(0), left_idx, right_idx,
1628 p);
1629#else
1630 ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, p);
1631#endif
1632
1633 unsigned int n = right_idx - left_idx;
1634 if ((n <= options_.min_leaf_primitives) ||
1635 (depth >= options_.max_tree_depth)) {
1636 // Create leaf node.
1637 BVHNode<T> leaf;
1638
1639 leaf.bmin[0] = bmin[0];
1640 leaf.bmin[1] = bmin[1];
1641 leaf.bmin[2] = bmin[2];
1642
1643 leaf.bmax[0] = bmax[0];
1644 leaf.bmax[1] = bmax[1];
1645 leaf.bmax[2] = bmax[2];
1646
1647 assert(left_idx < std::numeric_limits<unsigned int>::max());
1648
1649 leaf.flag = 1; // leaf
1650 leaf.data[0] = n;
1651 leaf.data[1] = left_idx;
1652
1653 out_nodes->push_back(leaf); // atomic update
1654
1655 stats_.num_leaf_nodes++;
1656
1657 return offset;
1658 }
1659
1660 //
1661 // Create branch node.
1662 //
1663 if (depth >= max_shallow_depth) {
1664 // Delay to build tree
1665 ShallowNodeInfo info;
1666 info.left_idx = left_idx;
1667 info.right_idx = right_idx;
1668 info.offset = offset;
1669 shallow_node_infos_.push_back(info);
1670
1671 // Add dummy node.
1672 BVHNode<T> node;
1673 node.axis = -1;
1674 node.flag = -1;
1675 out_nodes->push_back(node);
1676
1677 return offset;
1678
1679 }
1680 else {
1681 //
1682 // TODO(LTE): multi-threaded SAH computation, or use simple object median or
1683 // spacial median for shallow tree to speeding up the parallel build.
1684 //
1685
1686 //
1687 // Compute SAH and find best split axis and position
1688 //
1689 int min_cut_axis = 0;
1690 T cut_pos[3] = { 0.0, 0.0, 0.0 };
1691
1692 BinBuffer bins(options_.bin_size);
1693 ContributeBinBuffer(&bins, bmin, bmax, &indices_.at(0), left_idx, right_idx,
1694 p);
1695 FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax, n,
1696 options_.cost_t_aabb);
1697
1698 // Try all 3 axis until good cut position avaiable.
1699 unsigned int mid_idx = left_idx;
1700 int cut_axis = min_cut_axis;
1701
1702 for (int axis_try = 0; axis_try < 3; axis_try++) {
1703 unsigned int* begin = &indices_[left_idx];
1704 unsigned int* end =
1705 &indices_[right_idx - 1] + 1; // mimics end() iterator
1706 unsigned int* mid = 0;
1707
1708 // try min_cut_axis first.
1709 cut_axis = (min_cut_axis + axis_try) % 3;
1710
1711 pred.Set(cut_axis, cut_pos[cut_axis]);
1712 //
1713 // Split at (cut_axis, cut_pos)
1714 // indices_ will be modified.
1715 //
1716 mid = std::partition(begin, end, pred);
1717
1718 mid_idx = left_idx + static_cast<unsigned int>((mid - begin));
1719
1720 if ((mid_idx == left_idx) || (mid_idx == right_idx)) {
1721 // Can't split well.
1722 // Switch to object median (which may create unoptimized tree, but
1723 // stable)
1724 mid_idx = left_idx + (n >> 1);
1725
1726 // Try another axis if there's an axis to try.
1727
1728 }
1729 else {
1730 // Found good cut. exit loop.
1731 break;
1732 }
1733 }
1734
1735 BVHNode<T> node;
1736 node.axis = cut_axis;
1737 node.flag = 0; // 0 = branch
1738
1739 out_nodes->push_back(node);
1740
1741 unsigned int left_child_index = 0;
1742 unsigned int right_child_index = 0;
1743
1744 left_child_index = BuildShallowTree(out_nodes, left_idx, mid_idx, depth + 1,
1745 max_shallow_depth, p, pred);
1746
1747 right_child_index = BuildShallowTree(out_nodes, mid_idx, right_idx,
1748 depth + 1, max_shallow_depth, p, pred);
1749
1750 //std::cout << "shallow[" << offset << "] l and r = " << left_child_index << ", " << right_child_index << std::endl;
1751 (*out_nodes)[offset].data[0] = left_child_index;
1752 (*out_nodes)[offset].data[1] = right_child_index;
1753
1754 (*out_nodes)[offset].bmin[0] = bmin[0];
1755 (*out_nodes)[offset].bmin[1] = bmin[1];
1756 (*out_nodes)[offset].bmin[2] = bmin[2];
1757
1758 (*out_nodes)[offset].bmax[0] = bmax[0];
1759 (*out_nodes)[offset].bmax[1] = bmax[1];
1760 (*out_nodes)[offset].bmax[2] = bmax[2];
1761 }
1762
1763 stats_.num_branch_nodes++;
1764
1765 return offset;
1766 }
1767#endif
1768
1769 template <typename T>
1770 template <class P, class Pred>
1772 std::vector<BVHNode<T> >* out_nodes,
1773 unsigned int left_idx,
1774 unsigned int right_idx, unsigned int depth,
1775 const P& p, const Pred& pred) {
1776 assert(left_idx <= right_idx);
1777
1778 unsigned int offset = static_cast<unsigned int>(out_nodes->size());
1779
1780 if (out_stat->max_tree_depth < depth) {
1781 out_stat->max_tree_depth = depth;
1782 }
1783
1784 real3<T> bmin, bmax;
1785 if (!bboxes_.empty()) {
1786 GetBoundingBox(&bmin, &bmax, bboxes_, &indices_.at(0), left_idx, right_idx);
1787 }
1788 else {
1789 ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), left_idx, right_idx, p);
1790 }
1791
1792 unsigned int n = right_idx - left_idx;
1793 if ((n <= options_.min_leaf_primitives) ||
1794 (depth >= options_.max_tree_depth)) {
1795 // Create leaf node.
1796 BVHNode<T> leaf;
1797
1798 leaf.bmin[0] = bmin[0];
1799 leaf.bmin[1] = bmin[1];
1800 leaf.bmin[2] = bmin[2];
1801
1802 leaf.bmax[0] = bmax[0];
1803 leaf.bmax[1] = bmax[1];
1804 leaf.bmax[2] = bmax[2];
1805
1806 assert(left_idx < std::numeric_limits<unsigned int>::max());
1807
1808 leaf.flag = 1; // leaf
1809 leaf.data[0] = n;
1810 leaf.data[1] = left_idx;
1811
1812 out_nodes->push_back(leaf); // atomic update
1813
1814 out_stat->num_leaf_nodes++;
1815
1816 return offset;
1817 }
1818
1819 //
1820 // Create branch node.
1821 //
1822
1823 //
1824 // Compute SAH and find best split axis and position
1825 //
1826 int min_cut_axis = 0;
1827 T cut_pos[3] = { 0.0, 0.0, 0.0 };
1828
1829 BinBuffer bins(options_.bin_size);
1830 ContributeBinBuffer(&bins, bmin, bmax, &indices_.at(0), left_idx, right_idx,
1831 p);
1832 FindCutFromBinBuffer(cut_pos, &min_cut_axis, &bins, bmin, bmax, n,
1833 options_.cost_t_aabb);
1834
1835 // Try all 3 axis until good cut position avaiable.
1836 unsigned int mid_idx = left_idx;
1837 int cut_axis = min_cut_axis;
1838
1839 for (int axis_try = 0; axis_try < 3; axis_try++) {
1840 unsigned int* begin = &indices_[left_idx];
1841 unsigned int* end = &indices_[right_idx - 1] + 1; // mimics end() iterator.
1842 unsigned int* mid = 0;
1843
1844 // try min_cut_axis first.
1845 cut_axis = (min_cut_axis + axis_try) % 3;
1846
1847 pred.Set(cut_axis, cut_pos[cut_axis]);
1848
1849 //
1850 // Split at (cut_axis, cut_pos)
1851 // indices_ will be modified.
1852 //
1853 mid = std::partition(begin, end, pred);
1854
1855 mid_idx = left_idx + static_cast<unsigned int>((mid - begin));
1856
1857 if ((mid_idx == left_idx) || (mid_idx == right_idx)) {
1858 // Can't split well.
1859 // Switch to object median(which may create unoptimized tree, but
1860 // stable)
1861 mid_idx = left_idx + (n >> 1);
1862
1863 // Try another axis to find better cut.
1864
1865 }
1866 else {
1867 // Found good cut. exit loop.
1868 break;
1869 }
1870 }
1871
1872 BVHNode<T> node;
1873 node.axis = cut_axis;
1874 node.flag = 0; // 0 = branch
1875
1876 out_nodes->push_back(node);
1877
1878 unsigned int left_child_index = 0;
1879 unsigned int right_child_index = 0;
1880
1881 left_child_index =
1882 BuildTree(out_stat, out_nodes, left_idx, mid_idx, depth + 1, p, pred);
1883
1884 right_child_index =
1885 BuildTree(out_stat, out_nodes, mid_idx, right_idx, depth + 1, p, pred);
1886
1887 {
1888 (*out_nodes)[offset].data[0] = left_child_index;
1889 (*out_nodes)[offset].data[1] = right_child_index;
1890
1891 (*out_nodes)[offset].bmin[0] = bmin[0];
1892 (*out_nodes)[offset].bmin[1] = bmin[1];
1893 (*out_nodes)[offset].bmin[2] = bmin[2];
1894
1895 (*out_nodes)[offset].bmax[0] = bmax[0];
1896 (*out_nodes)[offset].bmax[1] = bmax[1];
1897 (*out_nodes)[offset].bmax[2] = bmax[2];
1898 }
1899
1900 out_stat->num_branch_nodes++;
1901
1902 return offset;
1903 }
1904
1905 template <typename T>
1906 template <class Prim, class Pred>
1907 bool BVHAccel<T>::Build(unsigned int num_primitives, const Prim& p,
1908 const Pred& pred, const BVHBuildOptions<T>& options) {
1909 options_ = options;
1910 stats_ = BVHBuildStatistics();
1911
1912 nodes_.clear();
1913 bboxes_.clear();
1914#if defined(NANORT_ENABLE_PARALLEL_BUILD)
1915 shallow_node_infos_.clear();
1916#endif
1917
1918 assert(options_.bin_size > 1);
1919
1920 if (num_primitives == 0) {
1921 return false;
1922 }
1923
1924 unsigned int n = num_primitives;
1925
1926 //
1927 // 1. Create triangle indices(this will be permutated in BuildTree)
1928 //
1929 indices_.resize(n);
1930
1931#if defined(NANORT_USE_CPP11_FEATURE)
1932 {
1933 size_t num_threads = std::min(
1934 size_t(kNANORT_MAX_THREADS),
1935 std::max(size_t(1), size_t(std::thread::hardware_concurrency())));
1936
1937 if (n < num_threads) {
1938 num_threads = n;
1939 }
1940
1941 std::vector<std::thread> workers;
1942
1943 size_t ndiv = n / num_threads;
1944
1945 for (size_t t = 0; t < num_threads; t++) {
1946 workers.emplace_back(std::thread([&, t]() {
1947 size_t si = t * ndiv;
1948 size_t ei = (t == (num_threads - 1)) ? n : std::min((t + 1) * ndiv, size_t(n));
1949
1950 for (size_t k = si; k < ei; k++) {
1951 indices_[k] = static_cast<unsigned int>(k);
1952 }
1953 }));
1954 }
1955
1956 for (auto& t : workers) {
1957 t.join();
1958 }
1959 }
1960
1961#else
1962
1963#ifdef _OPENMP
1964#pragma omp parallel for
1965#endif
1966 for (int i = 0; i < static_cast<int>(n); i++) {
1967 indices_[static_cast<size_t>(i)] = static_cast<unsigned int>(i);
1968 }
1969#endif // !NANORT_USE_CPP11_FEATURE
1970
1971 //
1972 // 2. Compute bounding box (optional).
1973 //
1974 real3<T> bmin, bmax;
1975
1976 if (options.cache_bbox) {
1977 bmin[0] = bmin[1] = bmin[2] = std::numeric_limits<T>::max();
1978 bmax[0] = bmax[1] = bmax[2] = -std::numeric_limits<T>::max();
1979
1980 bboxes_.resize(n);
1981
1982 for (size_t i = 0; i < n; i++) { // for each primitive
1983 unsigned int idx = indices_[i];
1984
1985 BBox<T> bbox;
1986 p.BoundingBox(&(bbox.bmin), &(bbox.bmax), static_cast<unsigned int>(i));
1987 bboxes_[idx] = bbox;
1988
1989 // xyz
1990 for (int k = 0; k < 3; k++) {
1991 bmin[k] = std::min(bmin[k], bbox.bmin[k]);
1992 bmax[k] = std::max(bmax[k], bbox.bmax[k]);
1993 }
1994 }
1995
1996 }
1997 else {
1998#if defined(NANORT_USE_CPP11_FEATURE)
1999 ComputeBoundingBoxThreaded(&bmin, &bmax, &indices_.at(0), 0, n, p);
2000#elif defined(_OPENMP)
2001 ComputeBoundingBoxOMP(&bmin, &bmax, &indices_.at(0), 0, n, p);
2002#else
2003 ComputeBoundingBox(&bmin, &bmax, &indices_.at(0), 0, n, p);
2004#endif
2005 }
2006
2007 //
2008 // 3. Build tree
2009 //
2010#if defined(NANORT_ENABLE_PARALLEL_BUILD)
2011#if defined(NANORT_USE_CPP11_FEATURE)
2012
2013 // Do parallel build for large enough datasets.
2014 if (n > options.min_primitives_for_parallel_build) {
2015 BuildShallowTree(&nodes_, 0, n, /* root depth */ 0, options.shallow_depth,
2016 p, pred); // [0, n)
2017
2018 assert(shallow_node_infos_.size() > 0);
2019
2020 // Build deeper tree in parallel
2021 std::vector<std::vector<BVHNode<T> > > local_nodes(
2022 shallow_node_infos_.size());
2023 std::vector<BVHBuildStatistics> local_stats(shallow_node_infos_.size());
2024
2025 size_t num_threads = std::min(
2026 size_t(kNANORT_MAX_THREADS),
2027 std::max(size_t(1), size_t(std::thread::hardware_concurrency())));
2028 if (shallow_node_infos_.size() < num_threads) {
2029 num_threads = shallow_node_infos_.size();
2030 }
2031
2032 std::vector<std::thread> workers;
2033 std::atomic<uint32_t> i(0);
2034
2035 for (size_t t = 0; t < num_threads; t++) {
2036 workers.emplace_back(std::thread([&]() {
2037 uint32_t idx = 0;
2038 while ((idx = (i++)) < shallow_node_infos_.size()) {
2039 // Create thread-local copy of Pred since some mutable variables are
2040 // modified during SAH computation.
2041 const Pred local_pred = pred;
2042 unsigned int left_idx = shallow_node_infos_[size_t(idx)].left_idx;
2043 unsigned int right_idx = shallow_node_infos_[size_t(idx)].right_idx;
2044 BuildTree(&(local_stats[size_t(idx)]), &(local_nodes[size_t(idx)]),
2045 left_idx, right_idx, options.shallow_depth, p, local_pred);
2046 }
2047 }));
2048 }
2049
2050 for (auto& t : workers) {
2051 t.join();
2052 }
2053
2054 // Join local nodes
2055 for (size_t ii = 0; ii < local_nodes.size(); ii++) {
2056 assert(!local_nodes[ii].empty());
2057 size_t offset = nodes_.size();
2058
2059 // Add offset to child index (for branch node).
2060 for (size_t j = 0; j < local_nodes[ii].size(); j++) {
2061 if (local_nodes[ii][j].flag == 0) { // branch
2062 local_nodes[ii][j].data[0] += offset - 1;
2063 local_nodes[ii][j].data[1] += offset - 1;
2064 }
2065 }
2066
2067 // replace
2068 nodes_[shallow_node_infos_[ii].offset] = local_nodes[ii][0];
2069
2070 // Skip root element of the local node.
2071 nodes_.insert(nodes_.end(), local_nodes[ii].begin() + 1,
2072 local_nodes[ii].end());
2073 }
2074
2075 // Join statistics
2076 for (size_t ii = 0; ii < local_nodes.size(); ii++) {
2077 stats_.max_tree_depth =
2078 std::max(stats_.max_tree_depth, local_stats[ii].max_tree_depth);
2079 stats_.num_leaf_nodes += local_stats[ii].num_leaf_nodes;
2080 stats_.num_branch_nodes += local_stats[ii].num_branch_nodes;
2081 }
2082
2083 }
2084 else {
2085 // Single thread.
2086 BuildTree(&stats_, &nodes_, 0, n,
2087 /* root depth */ 0, p, pred); // [0, n)
2088 }
2089
2090#elif defined(_OPENMP)
2091
2092 // Do parallel build for large enough datasets.
2093 if (n > options.min_primitives_for_parallel_build) {
2094 BuildShallowTree(&nodes_, 0, n, /* root depth */ 0, options.shallow_depth,
2095 p, pred); // [0, n)
2096
2097 assert(shallow_node_infos_.size() > 0);
2098
2099 // Build deeper tree in parallel
2100 std::vector<std::vector<BVHNode<T> > > local_nodes(
2101 shallow_node_infos_.size());
2102 std::vector<BVHBuildStatistics> local_stats(shallow_node_infos_.size());
2103
2104#pragma omp parallel for
2105 for (int i = 0; i < static_cast<int>(shallow_node_infos_.size()); i++) {
2106 unsigned int left_idx = shallow_node_infos_[size_t(i)].left_idx;
2107 unsigned int right_idx = shallow_node_infos_[size_t(i)].right_idx;
2108 const Pred local_pred = pred;
2109 BuildTree(&(local_stats[size_t(i)]), &(local_nodes[size_t(i)]), left_idx,
2110 right_idx, options.shallow_depth, p, local_pred);
2111 }
2112
2113 // Join local nodes
2114 for (size_t i = 0; i < local_nodes.size(); i++) {
2115 assert(!local_nodes[size_t(i)].empty());
2116 size_t offset = nodes_.size();
2117
2118 // Add offset to child index (for branch node).
2119 for (size_t j = 0; j < local_nodes[i].size(); j++) {
2120 if (local_nodes[i][j].flag == 0) { // branch
2121 local_nodes[i][j].data[0] += offset - 1;
2122 local_nodes[i][j].data[1] += offset - 1;
2123 }
2124 }
2125
2126 // replace
2127 nodes_[shallow_node_infos_[i].offset] = local_nodes[i][0];
2128
2129 // Skip root element of the local node.
2130 nodes_.insert(nodes_.end(), local_nodes[i].begin() + 1,
2131 local_nodes[i].end());
2132 }
2133
2134 // Join statistics
2135 for (size_t i = 0; i < local_nodes.size(); i++) {
2136 stats_.max_tree_depth =
2137 std::max(stats_.max_tree_depth, local_stats[i].max_tree_depth);
2138 stats_.num_leaf_nodes += local_stats[i].num_leaf_nodes;
2139 stats_.num_branch_nodes += local_stats[i].num_branch_nodes;
2140 }
2141
2142 }
2143 else {
2144 // Single thread
2145 BuildTree(&stats_, &nodes_, 0, n,
2146 /* root depth */ 0, p, pred); // [0, n)
2147 }
2148
2149#else // !NANORT_ENABLE_PARALLEL_BUILD
2150 {
2151 BuildTree(&stats_, &nodes_, 0, n,
2152 /* root depth */ 0, p, pred); // [0, n)
2153 }
2154#endif
2155#else // !_OPENMP
2156
2157 // Single thread BVH build
2158 {
2159 BuildTree(&stats_, &nodes_, 0, n,
2160 /* root depth */ 0, p, pred); // [0, n)
2161 }
2162#endif
2163
2164 return true;
2165 }
2166
2167 template <typename T>
2169 for (size_t i = 0; i < indices_.size(); i++) {
2170 printf("index[%d] = %d\n", int(i), int(indices_[i]));
2171 }
2172
2173 for (size_t i = 0; i < nodes_.size(); i++) {
2174 printf("node[%d] : bmin %f, %f, %f, bmax %f, %f, %f\n", int(i),
2175 nodes_[i].bmin[0], nodes_[i].bmin[1], nodes_[i].bmin[1],
2176 nodes_[i].bmax[0], nodes_[i].bmax[1], nodes_[i].bmax[1]);
2177 }
2178 }
2179
2180#if defined(NANORT_ENABLE_SERIALIZATION)
2181 template <typename T>
2182 bool BVHAccel<T>::Dump(const char* filename) const {
2183 FILE* fp = fopen(filename, "wb");
2184 if (!fp) {
2185 // fprintf(stderr, "[BVHAccel] Cannot write a file: %s\n", filename);
2186 return false;
2187 }
2188
2189 size_t numNodes = nodes_.size();
2190 assert(nodes_.size() > 0);
2191
2192 size_t numIndices = indices_.size();
2193
2194 size_t r = 0;
2195 r = fwrite(&numNodes, sizeof(size_t), 1, fp);
2196 assert(r == 1);
2197
2198 r = fwrite(&nodes_.at(0), sizeof(BVHNode<T>), numNodes, fp);
2199 assert(r == numNodes);
2200
2201 r = fwrite(&numIndices, sizeof(size_t), 1, fp);
2202 assert(r == 1);
2203
2204 r = fwrite(&indices_.at(0), sizeof(unsigned int), numIndices, fp);
2205 assert(r == numIndices);
2206
2207 fclose(fp);
2208
2209 return true;
2210 }
2211
2212 template <typename T>
2213 bool BVHAccel<T>::Dump(FILE* fp) const {
2214 size_t numNodes = nodes_.size();
2215 assert(nodes_.size() > 0);
2216
2217 size_t numIndices = indices_.size();
2218
2219 size_t r = 0;
2220 r = fwrite(&numNodes, sizeof(size_t), 1, fp);
2221 assert(r == 1);
2222
2223 r = fwrite(&nodes_.at(0), sizeof(BVHNode<T>), numNodes, fp);
2224 assert(r == numNodes);
2225
2226 r = fwrite(&numIndices, sizeof(size_t), 1, fp);
2227 assert(r == 1);
2228
2229 r = fwrite(&indices_.at(0), sizeof(unsigned int), numIndices, fp);
2230 assert(r == numIndices);
2231
2232 return true;
2233 }
2234
2235 template <typename T>
2236 bool BVHAccel<T>::Load(const char* filename) {
2237 FILE* fp = fopen(filename, "rb");
2238 if (!fp) {
2239 // fprintf(stderr, "Cannot open file: %s\n", filename);
2240 return false;
2241 }
2242
2243 size_t numNodes;
2244 size_t numIndices;
2245
2246 size_t r = 0;
2247 r = fread(&numNodes, sizeof(size_t), 1, fp);
2248 assert(r == 1);
2249 assert(numNodes > 0);
2250
2251 nodes_.resize(numNodes);
2252 r = fread(&nodes_.at(0), sizeof(BVHNode<T>), numNodes, fp);
2253 assert(r == numNodes);
2254
2255 r = fread(&numIndices, sizeof(size_t), 1, fp);
2256 assert(r == 1);
2257
2258 indices_.resize(numIndices);
2259
2260 r = fread(&indices_.at(0), sizeof(unsigned int), numIndices, fp);
2261 assert(r == numIndices);
2262
2263 fclose(fp);
2264
2265 return true;
2266 }
2267
2268 template <typename T>
2269 bool BVHAccel<T>::Load(FILE* fp) {
2270 size_t numNodes;
2271 size_t numIndices;
2272
2273 size_t r = 0;
2274 r = fread(&numNodes, sizeof(size_t), 1, fp);
2275 assert(r == 1);
2276 assert(numNodes > 0);
2277
2278 nodes_.resize(numNodes);
2279 r = fread(&nodes_.at(0), sizeof(BVHNode<T>), numNodes, fp);
2280 assert(r == numNodes);
2281
2282 r = fread(&numIndices, sizeof(size_t), 1, fp);
2283 assert(r == 1);
2284
2285 indices_.resize(numIndices);
2286
2287 r = fread(&indices_.at(0), sizeof(unsigned int), numIndices, fp);
2288 assert(r == numIndices);
2289
2290 return true;
2291 }
2292#endif
2293
2294 template <typename T>
2295 inline bool IntersectRayAABB(T* tminOut, // [out]
2296 T* tmaxOut, // [out]
2297 T min_t, T max_t, const T bmin[3], const T bmax[3],
2298 real3<T> ray_org, real3<T> ray_inv_dir,
2299 int ray_dir_sign[3]);
2300 template <>
2301 inline bool IntersectRayAABB<float>(float* tminOut, // [out]
2302 float* tmaxOut, // [out]
2303 float min_t, float max_t,
2304 const float bmin[3], const float bmax[3],
2305 real3<float> ray_org,
2306 real3<float> ray_inv_dir,
2307 int ray_dir_sign[3]) {
2308 float tmin, tmax;
2309
2310 const float min_x = ray_dir_sign[0] ? bmax[0] : bmin[0];
2311 const float min_y = ray_dir_sign[1] ? bmax[1] : bmin[1];
2312 const float min_z = ray_dir_sign[2] ? bmax[2] : bmin[2];
2313 const float max_x = ray_dir_sign[0] ? bmin[0] : bmax[0];
2314 const float max_y = ray_dir_sign[1] ? bmin[1] : bmax[1];
2315 const float max_z = ray_dir_sign[2] ? bmin[2] : bmax[2];
2316
2317 // X
2318 const float tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0];
2319 // MaxMult robust BVH traversal(up to 4 ulp).
2320 // 1.0000000000000004 for double precision.
2321 const float tmax_x = (max_x - ray_org[0]) * ray_inv_dir[0] * 1.00000024f;
2322
2323 // Y
2324 const float tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1];
2325 const float tmax_y = (max_y - ray_org[1]) * ray_inv_dir[1] * 1.00000024f;
2326
2327 // Z
2328 const float tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2];
2329 const float tmax_z = (max_z - ray_org[2]) * ray_inv_dir[2] * 1.00000024f;
2330
2331 tmin = safemax(tmin_z, safemax(tmin_y, safemax(tmin_x, min_t)));
2332 tmax = safemin(tmax_z, safemin(tmax_y, safemin(tmax_x, max_t)));
2333
2334 if (tmin <= tmax) {
2335 (*tminOut) = tmin;
2336 (*tmaxOut) = tmax;
2337
2338 return true;
2339 }
2340 return false; // no hit
2341 }
2342
2343 template <>
2344 inline bool IntersectRayAABB<double>(double* tminOut, // [out]
2345 double* tmaxOut, // [out]
2346 double min_t, double max_t,
2347 const double bmin[3], const double bmax[3],
2348 real3<double> ray_org,
2349 real3<double> ray_inv_dir,
2350 int ray_dir_sign[3]) {
2351 double tmin, tmax;
2352
2353 const double min_x = ray_dir_sign[0] ? bmax[0] : bmin[0];
2354 const double min_y = ray_dir_sign[1] ? bmax[1] : bmin[1];
2355 const double min_z = ray_dir_sign[2] ? bmax[2] : bmin[2];
2356 const double max_x = ray_dir_sign[0] ? bmin[0] : bmax[0];
2357 const double max_y = ray_dir_sign[1] ? bmin[1] : bmax[1];
2358 const double max_z = ray_dir_sign[2] ? bmin[2] : bmax[2];
2359
2360 // X
2361 const double tmin_x = (min_x - ray_org[0]) * ray_inv_dir[0];
2362 // MaxMult robust BVH traversal(up to 4 ulp).
2363 const double tmax_x =
2364 (max_x - ray_org[0]) * ray_inv_dir[0] * 1.0000000000000004;
2365
2366 // Y
2367 const double tmin_y = (min_y - ray_org[1]) * ray_inv_dir[1];
2368 const double tmax_y =
2369 (max_y - ray_org[1]) * ray_inv_dir[1] * 1.0000000000000004;
2370
2371 // Z
2372 const double tmin_z = (min_z - ray_org[2]) * ray_inv_dir[2];
2373 const double tmax_z =
2374 (max_z - ray_org[2]) * ray_inv_dir[2] * 1.0000000000000004;
2375
2376 tmin = safemax(tmin_z, safemax(tmin_y, safemax(tmin_x, min_t)));
2377 tmax = safemin(tmax_z, safemin(tmax_y, safemin(tmax_x, max_t)));
2378
2379 if (tmin <= tmax) {
2380 (*tminOut) = tmin;
2381 (*tmaxOut) = tmax;
2382
2383 return true;
2384 }
2385 return false; // no hit
2386 }
2387
2388 template <typename T>
2389 template <class I>
2390 inline bool BVHAccel<T>::TestLeafNode(const BVHNode<T>& node, const Ray<T>& ray,
2391 const I& intersector) const {
2392 bool hit = false;
2393
2394 unsigned int num_primitives = node.data[0];
2395 unsigned int offset = node.data[1];
2396
2397 T t = intersector.GetT(); // current hit distance
2398
2399 real3<T> ray_org;
2400 ray_org[0] = ray.org[0];
2401 ray_org[1] = ray.org[1];
2402 ray_org[2] = ray.org[2];
2403
2404 real3<T> ray_dir;
2405 ray_dir[0] = ray.dir[0];
2406 ray_dir[1] = ray.dir[1];
2407 ray_dir[2] = ray.dir[2];
2408
2409 for (unsigned int i = 0; i < num_primitives; i++) {
2410 unsigned int prim_idx = indices_[i + offset];
2411
2412 T local_t = t;
2413 if (intersector.Intersect(&local_t, prim_idx)) {
2414 // Update isect state
2415 t = local_t;
2416
2417 intersector.Update(t, prim_idx);
2418 hit = true;
2419 }
2420 }
2421
2422 return hit;
2423 }
2424
2425#if 0 // TODO(LTE): Implement
2426 template <typename T> template<class I, class H, class Comp>
2428 std::priority_queue<H, std::vector<H>, Comp>* isect_pq,
2429 int max_intersections,
2430 const BVHNode<T>& node,
2431 const Ray<T>& ray,
2432 const I& intersector) const {
2433 bool hit = false;
2434
2435 unsigned int num_primitives = node.data[0];
2436 unsigned int offset = node.data[1];
2437
2438 T t = std::numeric_limits<T>::max();
2439 if (isect_pq->size() >= static_cast<size_t>(max_intersections)) {
2440 t = isect_pq->top().t; // current furthest hit distance
2441 }
2442
2443 real3<T> ray_org;
2444 ray_org[0] = ray.org[0];
2445 ray_org[1] = ray.org[1];
2446 ray_org[2] = ray.org[2];
2447
2448 real3<T> ray_dir;
2449 ray_dir[0] = ray.dir[0];
2450 ray_dir[1] = ray.dir[1];
2451 ray_dir[2] = ray.dir[2];
2452
2453 for (unsigned int i = 0; i < num_primitives; i++) {
2454 unsigned int prim_idx = indices_[i + offset];
2455
2456 T local_t = t, u = 0.0f, v = 0.0f;
2457
2458 if (intersector.Intersect(&local_t, &u, &v, prim_idx))
2459 {
2460 // Update isect state
2461 if ((local_t > ray.min_t))
2462 {
2463 if (isect_pq->size() < static_cast<size_t>(max_intersections))
2464 {
2465 H isect;
2466 t = local_t;
2467 isect.t = t;
2468 isect.u = u;
2469 isect.v = v;
2470 isect.prim_id = prim_idx;
2471 isect_pq->push(isect);
2472
2473 // Update t to furthest distance.
2474 t = ray.max_t;
2475
2476 hit = true;
2477 }
2478 else if (local_t < isect_pq->top().t)
2479 {
2480 // delete furthest intersection and add new intersection.
2481 isect_pq->pop();
2482
2483 H hit;
2484 hit.t = local_t;
2485 hit.u = u;
2486 hit.v = v;
2487 hit.prim_id = prim_idx;
2488 isect_pq->push(hit);
2489
2490 // Update furthest hit distance
2491 t = isect_pq->top().t;
2492
2493 hit = true;
2494 }
2495 }
2496 }
2497 }
2498
2499 return hit;
2500 }
2501#endif
2502
2503 template <typename T>
2504 template <class I, class H>
2505 bool BVHAccel<T>::Traverse(const Ray<T>& ray, const I& intersector, H* isect,
2506 const BVHTraceOptions& options) const {
2507 const int kMaxStackDepth = 512;
2508 (void)kMaxStackDepth;
2509
2510 T hit_t = ray.max_t;
2511
2512 int node_stack_index = 0;
2513 unsigned int node_stack[512];
2514 node_stack[0] = 0;
2515
2516 // Init isect info as no hit
2517 intersector.Update(hit_t, static_cast<unsigned int>(-1));
2518
2519 intersector.PrepareTraversal(ray, options);
2520
2521 int dir_sign[3];
2522 dir_sign[0] = ray.dir[0] < static_cast<T>(0.0) ? 1 : 0;
2523 dir_sign[1] = ray.dir[1] < static_cast<T>(0.0) ? 1 : 0;
2524 dir_sign[2] = ray.dir[2] < static_cast<T>(0.0) ? 1 : 0;
2525
2526 real3<T> ray_inv_dir;
2527 real3<T> ray_dir;
2528 ray_dir[0] = ray.dir[0];
2529 ray_dir[1] = ray.dir[1];
2530 ray_dir[2] = ray.dir[2];
2531
2532 ray_inv_dir = vsafe_inverse(ray_dir);
2533
2534 real3<T> ray_org;
2535 ray_org[0] = ray.org[0];
2536 ray_org[1] = ray.org[1];
2537 ray_org[2] = ray.org[2];
2538
2539 T min_t = std::numeric_limits<T>::max();
2540 T max_t = -std::numeric_limits<T>::max();
2541
2542 while (node_stack_index >= 0) {
2543 unsigned int index = node_stack[node_stack_index];
2544 const BVHNode<T>& node = nodes_[index];
2545
2546 node_stack_index--;
2547
2548 bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin,
2549 node.bmax, ray_org, ray_inv_dir, dir_sign);
2550
2551 if (hit) {
2552 // Branch node
2553 if (node.flag == 0) {
2554 int order_near = dir_sign[node.axis];
2555 int order_far = 1 - order_near;
2556
2557 // Traverse near first.
2558 node_stack[++node_stack_index] = node.data[order_far];
2559 node_stack[++node_stack_index] = node.data[order_near];
2560 }
2561 else if (TestLeafNode(node, ray, intersector)) { // Leaf node
2562 hit_t = intersector.GetT();
2563 }
2564 }
2565 }
2566
2567 assert(node_stack_index < kNANORT_MAX_STACK_DEPTH);
2568
2569 bool hit = (intersector.GetT() < ray.max_t);
2570 intersector.PostTraversal(ray, hit, isect);
2571
2572 return hit;
2573 }
2574
2575 template <typename T>
2576 template <class I>
2578 const BVHNode<T>& node, const Ray<T>& ray, const int max_intersections,
2579 const I& intersector,
2580 std::priority_queue<NodeHit<T>, std::vector<NodeHit<T> >,
2581 NodeHitComparator<T> >* isect_pq) const {
2582 bool hit = false;
2583
2584 unsigned int num_primitives = node.data[0];
2585 unsigned int offset = node.data[1];
2586
2587 real3<T> ray_org;
2588 ray_org[0] = ray.org[0];
2589 ray_org[1] = ray.org[1];
2590 ray_org[2] = ray.org[2];
2591
2592 real3<T> ray_dir;
2593 ray_dir[0] = ray.dir[0];
2594 ray_dir[1] = ray.dir[1];
2595 ray_dir[2] = ray.dir[2];
2596
2597 intersector.PrepareTraversal(ray);
2598
2599 for (unsigned int i = 0; i < num_primitives; i++) {
2600 unsigned int prim_idx = indices_[i + offset];
2601
2602 T min_t, max_t;
2603
2604 if (intersector.Intersect(&min_t, &max_t, prim_idx)) {
2605 // Always add to isect lists.
2606 NodeHit<T> isect;
2607 isect.t_min = min_t;
2608 isect.t_max = max_t;
2609 isect.node_id = prim_idx;
2610
2611 if (isect_pq->size() < static_cast<size_t>(max_intersections)) {
2612 isect_pq->push(isect);
2613 }
2614 else if (min_t < isect_pq->top().t_min) {
2615 // delete the furthest intersection and add a new intersection.
2616 isect_pq->pop();
2617
2618 isect_pq->push(isect);
2619 }
2620 }
2621 }
2622
2623 return hit;
2624 }
2625
2626 template <typename T>
2627 template <class I>
2629 const Ray<T>& ray, int max_intersections, const I& intersector,
2630 StackVector<NodeHit<T>, 128>* hits) const {
2631 const int kMaxStackDepth = 512;
2632
2633 T hit_t = ray.max_t;
2634
2635 int node_stack_index = 0;
2636 unsigned int node_stack[512];
2637 node_stack[0] = 0;
2638
2639 // Stores furthest intersection at top
2640 std::priority_queue<NodeHit<T>, std::vector<NodeHit<T> >,
2642 isect_pq;
2643
2644 (*hits)->clear();
2645
2646 int dir_sign[3];
2647 dir_sign[0] = ray.dir[0] < static_cast<T>(0.0) ? 1 : 0;
2648 dir_sign[1] = ray.dir[1] < static_cast<T>(0.0) ? 1 : 0;
2649 dir_sign[2] = ray.dir[2] < static_cast<T>(0.0) ? 1 : 0;
2650
2651 real3<T> ray_inv_dir;
2652 real3<T> ray_dir;
2653
2654 ray_dir[0] = ray.dir[0];
2655 ray_dir[1] = ray.dir[1];
2656 ray_dir[2] = ray.dir[2];
2657
2658 ray_inv_dir = vsafe_inverse(ray_dir);
2659
2660 real3<T> ray_org;
2661 ray_org[0] = ray.org[0];
2662 ray_org[1] = ray.org[1];
2663 ray_org[2] = ray.org[2];
2664
2665 T min_t, max_t;
2666
2667 while (node_stack_index >= 0) {
2668 unsigned int index = node_stack[node_stack_index];
2669 const BVHNode<T>& node = nodes_[static_cast<size_t>(index)];
2670
2671 node_stack_index--;
2672
2673 bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin,
2674 node.bmax, ray_org, ray_inv_dir, dir_sign);
2675
2676 if (hit) {
2677 // Branch node
2678 if (node.flag == 0) {
2679 int order_near = dir_sign[node.axis];
2680 int order_far = 1 - order_near;
2681
2682 // Traverse near first.
2683 node_stack[++node_stack_index] = node.data[order_far];
2684 node_stack[++node_stack_index] = node.data[order_near];
2685 }
2686 else { // Leaf node
2687 TestLeafNodeIntersections(node, ray, max_intersections, intersector,
2688 &isect_pq);
2689 }
2690 }
2691 }
2692
2693 assert(node_stack_index < kMaxStackDepth);
2694 (void)kMaxStackDepth;
2695
2696 if (!isect_pq.empty()) {
2697 // Store intesection in reverse order (make it frontmost order)
2698 size_t n = isect_pq.size();
2699 (*hits)->resize(n);
2700
2701 for (size_t i = 0; i < n; i++) {
2702 const NodeHit<T>& isect = isect_pq.top();
2703 (*hits)[n - i - 1] = isect;
2704 isect_pq.pop();
2705 }
2706
2707 return true;
2708 }
2709
2710 return false;
2711 }
2712
2713#if 0 // TODO(LTE): Implement
2714 template <typename T> template<class I, class H, class Comp>
2715 bool BVHAccel<T>::MultiHitTraverse(const Ray<T>& ray,
2716 int max_intersections,
2717 const I& intersector,
2718 StackVector<H, 128>* hits,
2719 const BVHTraceOptions& options) const {
2720 const int kMaxStackDepth = 512;
2721
2722 T hit_t = ray.max_t;
2723
2724 int node_stack_index = 0;
2725 unsigned int node_stack[512];
2726 node_stack[0] = 0;
2727
2728 // Stores furthest intersection at top
2729 std::priority_queue<H, std::vector<H>, Comp> isect_pq;
2730
2731 (*hits)->clear();
2732
2733 // Init isect info as no hit
2734 intersector.Update(hit_t, static_cast<unsigned int>(-1));
2735
2736 intersector.PrepareTraversal(ray, options);
2737
2738 int dir_sign[3];
2739 dir_sign[0] = ray.dir[0] < static_cast<T>(0.0) ? static_cast<T>(1) : static_cast<T>(0);
2740 dir_sign[1] = ray.dir[1] < static_cast<T>(0.0) ? static_cast<T>(1) : static_cast<T>(0);
2741 dir_sign[2] = ray.dir[2] < static_cast<T>(0.0) ? static_cast<T>(1) : static_cast<T>(0);
2742
2743 real3<T> ray_inv_dir;
2744 real3<T> ray_dir;
2745
2746 ray_dir[0] = ray.dir[0];
2747 ray_dir[1] = ray.dir[1];
2748 ray_dir[2] = ray.dir[2];
2749
2750 ray_inv_dir = vsafe_inverse(ray_dir);
2751
2752 real3<T> ray_org;
2753 ray_org[0] = ray.org[0];
2754 ray_org[1] = ray.org[1];
2755 ray_org[2] = ray.org[2];
2756
2757 T min_t, max_t;
2758
2759 while (node_stack_index >= 0)
2760 {
2761 unsigned int index = node_stack[node_stack_index];
2762 const BVHNode<T>& node = nodes_[static_cast<size_t>(index)];
2763
2764 node_stack_index--;
2765
2766 bool hit = IntersectRayAABB(&min_t, &max_t, ray.min_t, hit_t, node.bmin,
2767 node.bmax, ray_org, ray_inv_dir, dir_sign);
2768
2769 // branch node
2770 if (hit)
2771 {
2772 if (node.flag == 0)
2773 {
2774 int order_near = dir_sign[node.axis];
2775 int order_far = 1 - order_near;
2776
2777 // Traverse near first.
2778 node_stack[++node_stack_index] = node.data[order_far];
2779 node_stack[++node_stack_index] = node.data[order_near];
2780 }
2781 else
2782 {
2783 if (MultiHitTestLeafNode(&isect_pq, max_intersections, node, ray, intersector))
2784 {
2785 // Only update `hit_t` when queue is full.
2786 if (isect_pq.size() >= static_cast<size_t>(max_intersections))
2787 {
2788 hit_t = isect_pq.top().t;
2789 }
2790 }
2791 }
2792 }
2793 }
2794
2795 assert(node_stack_index < kMaxStackDepth);
2796 (void)kMaxStackDepth;
2797
2798 if (!isect_pq.empty())
2799 {
2800 // Store intesection in reverse order (make it frontmost order)
2801 size_t n = isect_pq.size();
2802 (*hits)->resize(n);
2803
2804 for (size_t i = 0; i < n; i++)
2805 {
2806 const H& isect = isect_pq.top();
2807 (*hits)[n - i - 1] = isect;
2808 isect_pq.pop();
2809 }
2810
2811 return true;
2812 }
2813
2814 return false;
2815 }
2816#endif
2817
2818#ifdef __clang__
2819#pragma clang diagnostic pop
2820#endif
2821
2822} // namespace nanort
2823
2824#endif // NANORT_H_
#define kNANORT_MAX_STACK_DEPTH
Definition: nanort.h:70
#define kNANORT_MIN_PRIMITIVES_FOR_PARALLEL_BUILD
Definition: nanort.h:71
#define kNANORT_SHALLOW_DEPTH
Definition: nanort.h:72
STL namespace.
Definition: nanort.h:91
void ComputeBoundingBox(real3< T > *bmin, real3< T > *bmax, const unsigned int *indices, unsigned int left_index, unsigned int right_index, const P &p)
Definition: nanort.h:1553
T CalculateSurfaceArea(const real3< T > &min, const real3< T > &max)
Definition: nanort.h:1216
T vdot(const real3< T > a, const real3< T > b)
Definition: nanort.h:410
T SAH(size_t ns1, T leftArea, size_t ns2, T rightArea, T invS, T Taabb, T Ttri)
Definition: nanort.h:1322
real3< T > vnormalize(const real3< T > &rhs)
Definition: nanort.h:388
const T & safemin(const T &a, const T &b)
Definition: nanort.h:1190
const T & safemax(const T &a, const T &b)
Definition: nanort.h:1194
void GetBoundingBoxOfTriangle(real3< T > *bmin, real3< T > *bmax, const T *vertices, const unsigned int *faces, unsigned int index)
Definition: nanort.h:1223
real3< T > vcross(const real3< T > a, const real3< T > b)
Definition: nanort.h:401
void GetBoundingBox(real3< T > *bmin, real3< T > *bmax, const std::vector< BBox< T > > &bboxes, unsigned int *indices, unsigned int left_index, unsigned int right_index)
Definition: nanort.h:1577
void ContributeBinBuffer(BinBuffer *bins, const real3< T > &scene_min, const real3< T > &scene_max, unsigned int *indices, unsigned int left_idx, unsigned int right_idx, const P &p)
Definition: nanort.h:1252
RayType
Definition: nanort.h:94
@ RAY_TYPE_REFRACTION
Definition: nanort.h:100
@ RAY_TYPE_NONE
Definition: nanort.h:95
@ RAY_TYPE_SECONDARY
Definition: nanort.h:97
@ RAY_TYPE_DIFFUSE
Definition: nanort.h:98
@ RAY_TYPE_REFLECTION
Definition: nanort.h:99
@ RAY_TYPE_PRIMARY
Definition: nanort.h:96
const real * get_vertex_addr(const real *p, const size_t idx, const size_t stride_bytes)
Definition: nanort.h:474
bool IntersectRayAABB< double >(double *tminOut, double *tmaxOut, double min_t, double max_t, const double bmin[3], const double bmax[3], real3< double > ray_org, real3< double > ray_inv_dir, int ray_dir_sign[3])
Definition: nanort.h:2344
bool IntersectRayAABB(T *tminOut, T *tmaxOut, T min_t, T max_t, const T bmin[3], const T bmax[3], real3< T > ray_org, real3< T > ray_inv_dir, int ray_dir_sign[3])
real3< T > vsafe_inverse(const real3< T > v)
Definition: nanort.h:415
T vlength(const real3< T > &rhs)
Definition: nanort.h:383
bool IntersectRayAABB< float >(float *tminOut, float *tmaxOut, float min_t, float max_t, const float bmin[3], const float bmax[3], real3< float > ray_org, real3< float > ray_inv_dir, int ray_dir_sign[3])
Definition: nanort.h:2301
real3< T > operator*(T f, const real3< T > &v)
Definition: nanort.h:373
bool FindCutFromBinBuffer(T *cut_pos, int *minCostAxis, const BinBuffer *bins, const real3< T > &bmin, const real3< T > &bmax, size_t num_primitives, T costTaabb)
Definition: nanort.h:1334
real3< T > vneg(const real3< T > &rhs)
Definition: nanort.h:378
std::allocator< T >::size_type size_type
Definition: nanort.h:141
pointer allocate(size_type n, void *hint=0)
Definition: nanort.h:202
std::allocator< T >::pointer pointer
Definition: nanort.h:140
StackAllocator< U, stack_capacity > other
Definition: nanort.h:175
void deallocate(pointer p, size_type n)
Definition: nanort.h:215
StackAllocator(const StackAllocator< T, stack_capacity > &rhs)
Definition: nanort.h:179
StackAllocator(Source *source)
Definition: nanort.h:197
StackAllocator(const StackAllocator< U, other_capacity > &other)
Definition: nanort.h:192
char stack_buffer_[sizeof(T[stack_capacity])]
Definition: nanort.h:165
const T * stack_buffer() const
Definition: nanort.h:151
ContainerType * operator->()
Definition: nanort.h:260
const ContainerType & container() const
Definition: nanort.h:255
Allocator allocator_
Definition: nanort.h:272
ContainerType::value_type ContainedType
Definition: nanort.h:238
ContainerType container_
Definition: nanort.h:273
const ContainerType * operator->() const
Definition: nanort.h:261
StackContainer(const StackContainer &)
ContainerType & container()
Definition: nanort.h:254
TContainerType ContainerType
Definition: nanort.h:237
void operator=(const StackContainer &)
StackAllocator< ContainedType, stack_capacity > Allocator
Definition: nanort.h:239
Allocator::Source stack_data_
Definition: nanort.h:270
unsigned char pad_[7]
Definition: nanort.h:271
StackVector(const StackVector< T, stack_capacity > &other)
Definition: nanort.h:299
const T & operator[](size_t i) const
Definition: nanort.h:314
StackVector< T, stack_capacity > & operator=(const StackVector< T, stack_capacity > &other)
Definition: nanort.h:305
T & operator[](size_t i)
Definition: nanort.h:313
real3 operator/(const real3 &f2) const
Definition: nanort.h:361
real3 operator*(const real3 &f2) const
Definition: nanort.h:349
T x() const
Definition: nanort.h:341
real3 operator*(T f) const
Definition: nanort.h:345
real3(T xx, T yy, T zz)
Definition: nanort.h:330
T & operator[](int i)
Definition: nanort.h:366
real3 operator-() const
Definition: nanort.h:364
T z() const
Definition: nanort.h:343
real3(T x)
Definition: nanort.h:325
real3(const T *p)
Definition: nanort.h:335
real3 operator+(const real3 &f2) const
Definition: nanort.h:352
T operator[](int i) const
Definition: nanort.h:365
real3 & operator+=(const real3 &f2)
Definition: nanort.h:355
T y() const
Definition: nanort.h:342
real3 operator-(const real3 &f2) const
Definition: nanort.h:346
T dir[3]
Definition: nanort.h:496
unsigned int type
Definition: nanort.h:499
T org[3]
Definition: nanort.h:495
BVHNode(const BVHNode &rhs)
Definition: nanort.h:508
BVHNode & operator=(const BVHNode &rhs)
Definition: nanort.h:523
unsigned int data[2]
Definition: nanort.h:555
bool operator()(const H &a, const H &b) const
Definition: nanort.h:561
BVH build option.
Definition: nanort.h:566
unsigned char pad[3]
Definition: nanort.h:577
unsigned int bin_size
Definition: nanort.h:570
unsigned int min_primitives_for_parallel_build
Definition: nanort.h:572
unsigned int max_tree_depth
Definition: nanort.h:569
unsigned int min_leaf_primitives
Definition: nanort.h:568
unsigned int shallow_depth
Definition: nanort.h:571
BVH build statistics.
Definition: nanort.h:592
unsigned int num_leaf_nodes
Definition: nanort.h:595
unsigned int max_tree_depth
Definition: nanort.h:594
unsigned int num_branch_nodes
Definition: nanort.h:596
BVH trace option.
Definition: nanort.h:610
unsigned int skip_prim_id
Definition: nanort.h:618
unsigned char pad[3]
Padding (not used)
Definition: nanort.h:621
unsigned int prim_ids_range[2]
Definition: nanort.h:614
Bounding box.
Definition: nanort.h:636
real3< T > bmax
Definition: nanort.h:639
real3< T > bmin
Definition: nanort.h:638
Hit class for traversing nodes.
Definition: nanort.h:654
unsigned int node_id
Definition: nanort.h:679
NodeHit & operator=(const NodeHit< T > &rhs)
Definition: nanort.h:667
NodeHit(const NodeHit< T > &rhs)
Definition: nanort.h:661
Comparator object for NodeHit.
Definition: nanort.h:688
bool operator()(const NodeHit< T > &a, const NodeHit< T > &b)
Definition: nanort.h:690
Bounding Volume Hierarchy acceleration.
Definition: nanort.h:705
std::vector< unsigned int > indices_
Definition: nanort.h:862
void Debug()
Definition: nanort.h:2168
bool TestLeafNodeIntersections(const BVHNode< T > &node, const Ray< T > &ray, const int max_intersections, const I &intersector, std::priority_queue< NodeHit< T >, std::vector< NodeHit< T > >, NodeHitComparator< T > > *isect_pq) const
Definition: nanort.h:2577
bool Traverse(const Ray< T > &ray, const I &intersector, H *isect, const BVHTraceOptions &options=BVHTraceOptions()) const
Traverse into BVH along ray and find closest hit point & primitive if found.
Definition: nanort.h:2505
bool IsValid() const
Definition: nanort.h:813
BVHBuildOptions< T > options_
Definition: nanort.h:864
bool ListNodeIntersections(const Ray< T > &ray, int max_intersections, const I &intersector, StackVector< NodeHit< T >, 128 > *hits) const
List up nodes which intersects along the ray. This function is useful for two-level BVH traversal....
Definition: nanort.h:2628
void BoundingBox(T bmin[3], T bmax[3]) const
Returns bounding box of built BVH.
Definition: nanort.h:798
bool Build(const unsigned int num_primitives, const Prim &p, const Pred &pred, const BVHBuildOptions< T > &options=BVHBuildOptions< T >())
Build BVH for input primitives.
Definition: nanort.h:1907
const std::vector< unsigned int > & GetIndices() const
Definition: nanort.h:793
const std::vector< BVHNode< T > > & GetNodes() const
Definition: nanort.h:792
bool TestLeafNode(const BVHNode< T > &node, const Ray< T > &ray, const I &intersector) const
Definition: nanort.h:2390
std::vector< BBox< T > > bboxes_
Definition: nanort.h:863
unsigned int pad0_
Definition: nanort.h:866
unsigned int BuildTree(BVHBuildStatistics *out_stat, std::vector< BVHNode< T > > *out_nodes, unsigned int left_idx, unsigned int right_idx, unsigned int depth, const P &p, const Pred &pred)
Builds BVH tree recursively.
Definition: nanort.h:1771
BVHBuildStatistics stats_
Definition: nanort.h:865
std::vector< BVHNode< T > > nodes_
Definition: nanort.h:861
BVHBuildStatistics GetStatistics() const
Get statistics of built BVH tree. Valid after Build()
Definition: nanort.h:731
const size_t vertex_stride_bytes_
Definition: nanort.h:926
const unsigned int * faces_
Definition: nanort.h:925
void Set(int axis, T pos) const
Definition: nanort.h:899
TriangleSAHPred(const TriangleSAHPred< T > &rhs)
Definition: nanort.h:882
TriangleSAHPred(const T *vertices, const unsigned int *faces, size_t vertex_stride_bytes)
Definition: nanort.h:873
TriangleSAHPred< T > & operator=(const TriangleSAHPred< T > &rhs)
Definition: nanort.h:889
bool operator()(unsigned int i) const
Definition: nanort.h:904
const T * vertices_
Definition: nanort.h:924
TriangleMesh(const T *vertices, const unsigned int *faces, const size_t vertex_stride_bytes)
Definition: nanort.h:933
const size_t vertex_stride_bytes_
Definition: nanort.h:968
const unsigned int * faces_
Definition: nanort.h:967
void BoundingBox(real3< T > *bmin, real3< T > *bmax, unsigned int prim_index) const
Compute bounding box for prim_indexth triangle. This function is called for each primitive in BVH bui...
Definition: nanort.h:942
const T * vertices_
Definition: nanort.h:966
void PrepareTraversal(const Ray< T > &ray, const BVHTraceOptions &trace_options) const
Prepare BVH traversal (e.g. compute inverse ray direction) This function is called only once in BVH t...
Definition: nanort.h:1116
const size_t vertex_stride_bytes_
Definition: nanort.h:1171
TriangleIntersector(const T *vertices, const unsigned int *faces, const size_t vertex_stride_bytes)
Definition: nanort.h:985
void PostTraversal(const Ray< T > &ray, bool hit, H *isect) const
Post BVH traversal stuff. Fill isect if there is a hit.
Definition: nanort.h:1158
T GetT() const
Returns the nearest hit distance.
Definition: nanort.h:1106
void Update(T t, unsigned int prim_idx) const
Update is called when initializing intersection and nearest hit is found.
Definition: nanort.h:1109
BVHTraceOptions trace_options_
Definition: nanort.h:1175
bool Intersect(T *t_inout, const unsigned int prim_index) const
Do ray intersection stuff for prim_index th primitive and return hit distance t, barycentric coordina...
Definition: nanort.h:1007
const unsigned int * faces_
Definition: nanort.h:1170
unsigned int bin_size
Definition: nanort.h:1211
BinBuffer(unsigned int size)
Definition: nanort.h:1202
unsigned int pad0
Definition: nanort.h:1212
std::vector< size_t > bin
Definition: nanort.h:1210