mirror of https://github.com/koide3/small_gicp.git
parallerize kdtree construction
This commit is contained in:
parent
10be36d0c4
commit
eb709de524
|
|
@ -24,6 +24,8 @@ find_package(TBB REQUIRED)
|
|||
find_package(Eigen3 REQUIRED)
|
||||
find_package(Iridescence REQUIRED)
|
||||
|
||||
set(FAST_GICP_INCLUDE_DIR /home/koide/workspace/fast_gicp/include)
|
||||
|
||||
find_package(OpenMP)
|
||||
if (OPENMP_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||
|
|
@ -70,11 +72,11 @@ add_executable(odometry_benchmark
|
|||
)
|
||||
target_include_directories(odometry_benchmark PUBLIC
|
||||
include
|
||||
/home/koide/workspace/fast_gicp/include
|
||||
${PCL_INCLUDE_DIRS}
|
||||
${TBB_INCLUDE_DIRS}
|
||||
${EIGEN3_INCLUDE_DIR}
|
||||
${Iridescence_INCLUDE_DIRS}
|
||||
${FAST_GICP_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(odometry_benchmark
|
||||
fmt::fmt
|
||||
|
|
@ -96,4 +98,19 @@ target_link_libraries(downsampling_benchmark
|
|||
fmt::fmt
|
||||
${PCL_LIBRARIES}
|
||||
${TBB_LIBRARIES}
|
||||
)
|
||||
|
||||
add_executable(sort_test
|
||||
src/sort_test.cpp
|
||||
)
|
||||
target_include_directories(sort_test PUBLIC
|
||||
include
|
||||
${PCL_INCLUDE_DIRS}
|
||||
${TBB_INCLUDE_DIRS}
|
||||
${EIGEN3_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(sort_test
|
||||
fmt::fmt
|
||||
${PCL_LIBRARIES}
|
||||
${TBB_LIBRARIES}
|
||||
)
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include <small_gicp/points/traits.hpp>
|
||||
#include <small_gicp/ann/traits.hpp>
|
||||
#include <small_gicp/ann/nanoflann.hpp>
|
||||
#include <small_gicp/ann/nanoflann_mt.hpp>
|
||||
|
||||
namespace small_gicp {
|
||||
|
||||
/// @brief Unsafe KdTree with multi-thread tree construction.
|
||||
/// @note This class only parallelizes the tree construction. The search is still single-threaded as in the normal KdTree.
|
||||
template <typename PointCloud>
|
||||
class UnsafeKdTreeMT {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<UnsafeKdTreeMT>;
|
||||
using ConstPtr = std::shared_ptr<const UnsafeKdTreeMT>;
|
||||
using Index = nanoflann::KDTreeSingleIndexAdaptorMT<nanoflann::L2_Simple_Adaptor<double, UnsafeKdTreeMT<PointCloud>, double>, UnsafeKdTreeMT<PointCloud>, 3, size_t>;
|
||||
|
||||
/// @brief Constructor
|
||||
/// @param points Input points
|
||||
UnsafeKdTreeMT(const PointCloud& points, int num_threads = 4) : points(points), index(3, *this, nanoflann::KDTreeSingleIndexAdaptorParams(10)) { index.buildIndex(num_threads); }
|
||||
~UnsafeKdTreeMT() {}
|
||||
|
||||
// Interfaces for nanoflann
|
||||
size_t kdtree_get_point_count() const { return traits::size(points); }
|
||||
double kdtree_get_pt(const size_t idx, const size_t dim) const { return traits::point(points, idx)[dim]; }
|
||||
|
||||
template <class BBox>
|
||||
bool kdtree_get_bbox(BBox&) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// @brief Find k-nearest neighbors
|
||||
size_t knn_search(const Eigen::Vector4d& pt, size_t k, size_t* k_indices, double* k_sq_dists) const { return index.knnSearch(pt.data(), k, k_indices, k_sq_dists); }
|
||||
|
||||
private:
|
||||
const PointCloud& points; ///< Input points
|
||||
Index index; ///< KdTree index
|
||||
};
|
||||
|
||||
/// @brief KdTree
|
||||
template <typename PointCloud>
|
||||
class KdTreeMT {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<KdTreeMT>;
|
||||
using ConstPtr = std::shared_ptr<const KdTreeMT>;
|
||||
|
||||
/// @brief Constructor
|
||||
/// @param points Input points
|
||||
KdTreeMT(const std::shared_ptr<const PointCloud>& points, int num_threads = 4) : points(points), tree(*points, num_threads) {}
|
||||
~KdTreeMT() {}
|
||||
|
||||
/// @brief Find k-nearest neighbors
|
||||
size_t knn_search(const Eigen::Vector4d& pt, size_t k, size_t* k_indices, double* k_sq_dists) const { return tree.knn_search(pt, k, k_indices, k_sq_dists); }
|
||||
|
||||
private:
|
||||
const std::shared_ptr<const PointCloud> points; ///< Input points
|
||||
const UnsafeKdTreeMT<PointCloud> tree; ///< KdTree
|
||||
};
|
||||
|
||||
namespace traits {
|
||||
|
||||
template <typename PointCloud>
|
||||
struct Traits<UnsafeKdTreeMT<PointCloud>> {
|
||||
static size_t knn_search(const UnsafeKdTreeMT<PointCloud>& tree, const Eigen::Vector4d& point, size_t k, size_t* k_indices, double* k_sq_dists) {
|
||||
return tree.knn_search(point, k, k_indices, k_sq_dists);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PointCloud>
|
||||
struct Traits<KdTreeMT<PointCloud>> {
|
||||
static size_t knn_search(const KdTreeMT<PointCloud>& tree, const Eigen::Vector4d& point, size_t k, size_t* k_indices, double* k_sq_dists) {
|
||||
return tree.knn_search(point, k, k_indices, k_sq_dists);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace traits
|
||||
|
||||
} // namespace small_gicp
|
||||
|
|
@ -0,0 +1,649 @@
|
|||
/***********************************************************************
|
||||
* Software License Agreement (BSD License)
|
||||
*
|
||||
* Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
|
||||
* Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
|
||||
* Copyright 2011-2021 Jose Luis Blanco (joseluisblancoc@gmail.com).
|
||||
* All rights reserved.
|
||||
*
|
||||
* THE BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*************************************************************************/
|
||||
|
||||
/**
|
||||
* This nanoflann_mt.hpp is derived from nanoflann.hpp to parallelize the tree construction with OpenMP and TBB
|
||||
*/
|
||||
|
||||
/** \mainpage nanoflann C++ API documentation
|
||||
* nanoflann is a C++ header-only library for building KD-Trees, mostly
|
||||
* optimized for 2D or 3D point clouds.
|
||||
*
|
||||
* nanoflann does not require compiling or installing, just an
|
||||
* #include <nanoflann.hpp> in your code.
|
||||
*
|
||||
* See:
|
||||
* - <a href="modules.html" >C++ API organized by modules</a>
|
||||
* - <a href="https://github.com/jlblancoc/nanoflann" >Online README</a>
|
||||
* - <a href="http://jlblancoc.github.io/nanoflann/" >Doxygen
|
||||
* documentation</a>
|
||||
*/
|
||||
|
||||
#ifndef NANOFLANN_MT_HPP_
|
||||
#define NANOFLANN_MT_HPP_
|
||||
|
||||
#include <tbb/tbb.h>
|
||||
#include <small_gicp/ann/nanoflann.hpp>
|
||||
|
||||
namespace nanoflann {
|
||||
|
||||
/** kd-tree base-class
|
||||
*
|
||||
* Contains the member functions common to the classes KDTreeSingleIndexAdaptor
|
||||
* and KDTreeSingleIndexDynamicAdaptor_.
|
||||
*
|
||||
* \tparam Derived The name of the class which inherits this class.
|
||||
* \tparam DatasetAdaptor The user-provided adaptor (see comments above).
|
||||
* \tparam Distance The distance metric to use, these are all classes derived
|
||||
* from nanoflann::Metric \tparam DIM Dimensionality of data points (e.g. 3 for
|
||||
* 3D points) \tparam IndexType Will be typically size_t or int
|
||||
*/
|
||||
|
||||
template <class Derived, typename Distance, class DatasetAdaptor, int DIM = -1, typename IndexType = size_t>
|
||||
class KDTreeBaseClassMT {
|
||||
public:
|
||||
/** Frees the previously-built index. Automatically called within
|
||||
* buildIndex(). */
|
||||
void freeIndex(Derived& obj) {
|
||||
obj.root_node = NULL;
|
||||
obj.m_size_at_index_build = 0;
|
||||
}
|
||||
|
||||
typedef typename Distance::ElementType ElementType;
|
||||
typedef typename Distance::DistanceType DistanceType;
|
||||
|
||||
/*--------------------- Internal Data Structures --------------------------*/
|
||||
struct Node {
|
||||
/** Union used because a node can be either a LEAF node or a non-leaf node,
|
||||
* so both data fields are never used simultaneously */
|
||||
union {
|
||||
struct leaf {
|
||||
IndexType left, right; //!< Indices of points in leaf node
|
||||
} lr;
|
||||
struct nonleaf {
|
||||
int divfeat; //!< Dimension used for subdivision.
|
||||
DistanceType divlow, divhigh; //!< The values used for subdivision.
|
||||
} sub;
|
||||
} node_type;
|
||||
Node *child1, *child2; //!< Child nodes (both=NULL mean its a leaf node)
|
||||
};
|
||||
|
||||
typedef Node* NodePtr;
|
||||
|
||||
struct Interval {
|
||||
ElementType low, high;
|
||||
};
|
||||
|
||||
/**
|
||||
* Array of indices to vectors in the dataset.
|
||||
*/
|
||||
std::vector<IndexType> vind;
|
||||
|
||||
NodePtr root_node;
|
||||
|
||||
size_t m_leaf_max_size;
|
||||
|
||||
size_t m_size; //!< Number of current points in the dataset
|
||||
size_t m_size_at_index_build; //!< Number of points in the dataset when the
|
||||
//!< index was built
|
||||
int dim; //!< Dimensionality of each data point
|
||||
|
||||
/** Define "BoundingBox" as a fixed-size or variable-size container depending
|
||||
* on "DIM" */
|
||||
typedef typename array_or_vector_selector<DIM, Interval>::container_t BoundingBox;
|
||||
|
||||
/** Define "distance_vector_t" as a fixed-size or variable-size container
|
||||
* depending on "DIM" */
|
||||
typedef typename array_or_vector_selector<DIM, DistanceType>::container_t distance_vector_t;
|
||||
|
||||
/** The KD-tree used to find neighbours */
|
||||
|
||||
BoundingBox root_bbox;
|
||||
|
||||
/**
|
||||
* Pooled memory allocator.
|
||||
*
|
||||
* Using a pooled memory allocator is more efficient
|
||||
* than allocating memory directly when there is a large
|
||||
* number small of memory allocations.
|
||||
*/
|
||||
// NOTE: Should remove TBB to minimize the dependencies
|
||||
tbb::concurrent_vector<Node> pool;
|
||||
|
||||
/** Returns number of points in dataset */
|
||||
size_t size(const Derived& obj) const { return obj.m_size; }
|
||||
|
||||
/** Returns the length of each point in the dataset */
|
||||
size_t veclen(const Derived& obj) { return static_cast<size_t>(DIM > 0 ? DIM : obj.dim); }
|
||||
|
||||
/// Helper accessor to the dataset points:
|
||||
inline ElementType dataset_get(const Derived& obj, size_t idx, int component) const { return obj.dataset.kdtree_get_pt(idx, component); }
|
||||
|
||||
void computeMinMax(const Derived& obj, IndexType* ind, IndexType count, int element, ElementType& min_elem, ElementType& max_elem) {
|
||||
min_elem = dataset_get(obj, ind[0], element);
|
||||
max_elem = dataset_get(obj, ind[0], element);
|
||||
for (IndexType i = 1; i < count; ++i) {
|
||||
ElementType val = dataset_get(obj, ind[i], element);
|
||||
if (val < min_elem) min_elem = val;
|
||||
if (val > max_elem) max_elem = val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tree node that subdivides the list of vecs from vind[first]
|
||||
* to vind[last]. The routine is called recursively on each sublist.
|
||||
*
|
||||
* @param left index of the first vector
|
||||
* @param right index of the last vector
|
||||
*/
|
||||
NodePtr divideTree(Derived& obj, const IndexType left, const IndexType right, BoundingBox& bbox) {
|
||||
NodePtr node = &(*pool.emplace_back());
|
||||
|
||||
/* If too few exemplars remain, then make this a leaf node. */
|
||||
if ((right - left) <= static_cast<IndexType>(obj.m_leaf_max_size)) {
|
||||
node->child1 = node->child2 = NULL; /* Mark as leaf node. */
|
||||
node->node_type.lr.left = left;
|
||||
node->node_type.lr.right = right;
|
||||
|
||||
// compute bounding-box of leaf points
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
|
||||
bbox[i].low = dataset_get(obj, obj.vind[left], i);
|
||||
bbox[i].high = dataset_get(obj, obj.vind[left], i);
|
||||
}
|
||||
for (IndexType k = left + 1; k < right; ++k) {
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
|
||||
if (bbox[i].low > dataset_get(obj, obj.vind[k], i)) bbox[i].low = dataset_get(obj, obj.vind[k], i);
|
||||
if (bbox[i].high < dataset_get(obj, obj.vind[k], i)) bbox[i].high = dataset_get(obj, obj.vind[k], i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
IndexType idx;
|
||||
int cutfeat;
|
||||
DistanceType cutval;
|
||||
middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval, bbox);
|
||||
|
||||
node->node_type.sub.divfeat = cutfeat;
|
||||
|
||||
BoundingBox left_bbox(bbox);
|
||||
left_bbox[cutfeat].high = cutval;
|
||||
|
||||
BoundingBox right_bbox(bbox);
|
||||
right_bbox[cutfeat].low = cutval;
|
||||
|
||||
if ((right - left) <= 512) {
|
||||
// Do not parallelize if there are less than 512 points
|
||||
node->child1 = divideTree(obj, left, left + idx, left_bbox);
|
||||
node->child2 = divideTree(obj, left + idx, right, right_bbox);
|
||||
} else {
|
||||
// I did my best to check that the following parallelization does not cause race conditions.
|
||||
// But, still not 100% sure if it is correct.
|
||||
#pragma omp task shared(obj, left_bbox)
|
||||
node->child1 = divideTree(obj, left, left + idx, left_bbox);
|
||||
#pragma omp task shared(obj, right_bbox)
|
||||
node->child2 = divideTree(obj, left + idx, right, right_bbox);
|
||||
#pragma omp taskwait
|
||||
}
|
||||
|
||||
node->node_type.sub.divlow = left_bbox[cutfeat].high;
|
||||
node->node_type.sub.divhigh = right_bbox[cutfeat].low;
|
||||
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
|
||||
bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
|
||||
bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void middleSplit_(Derived& obj, IndexType* ind, IndexType count, IndexType& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox) {
|
||||
const DistanceType EPS = static_cast<DistanceType>(0.00001);
|
||||
ElementType max_span = bbox[0].high - bbox[0].low;
|
||||
for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) {
|
||||
ElementType span = bbox[i].high - bbox[i].low;
|
||||
if (span > max_span) {
|
||||
max_span = span;
|
||||
}
|
||||
}
|
||||
ElementType max_spread = -1;
|
||||
cutfeat = 0;
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
|
||||
ElementType span = bbox[i].high - bbox[i].low;
|
||||
if (span > (1 - EPS) * max_span) {
|
||||
ElementType min_elem, max_elem;
|
||||
computeMinMax(obj, ind, count, i, min_elem, max_elem);
|
||||
ElementType spread = max_elem - min_elem;
|
||||
if (spread > max_spread) {
|
||||
cutfeat = i;
|
||||
max_spread = spread;
|
||||
}
|
||||
}
|
||||
}
|
||||
// split in the middle
|
||||
DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2;
|
||||
ElementType min_elem, max_elem;
|
||||
computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem);
|
||||
|
||||
if (split_val < min_elem)
|
||||
cutval = min_elem;
|
||||
else if (split_val > max_elem)
|
||||
cutval = max_elem;
|
||||
else
|
||||
cutval = split_val;
|
||||
|
||||
IndexType lim1, lim2;
|
||||
planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2);
|
||||
|
||||
if (lim1 > count / 2)
|
||||
index = lim1;
|
||||
else if (lim2 < count / 2)
|
||||
index = lim2;
|
||||
else
|
||||
index = count / 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subdivide the list of points by a plane perpendicular on axe corresponding
|
||||
* to the 'cutfeat' dimension at 'cutval' position.
|
||||
*
|
||||
* On return:
|
||||
* dataset[ind[0..lim1-1]][cutfeat]<cutval
|
||||
* dataset[ind[lim1..lim2-1]][cutfeat]==cutval
|
||||
* dataset[ind[lim2..count]][cutfeat]>cutval
|
||||
*/
|
||||
void planeSplit(Derived& obj, IndexType* ind, const IndexType count, int cutfeat, DistanceType& cutval, IndexType& lim1, IndexType& lim2) {
|
||||
/* Move vector indices for left subtree to front of list. */
|
||||
IndexType left = 0;
|
||||
IndexType right = count - 1;
|
||||
for (;;) {
|
||||
while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval) ++left;
|
||||
while (right && left <= right && dataset_get(obj, ind[right], cutfeat) >= cutval) --right;
|
||||
if (left > right || !right) break; // "!right" was added to support unsigned Index types
|
||||
std::swap(ind[left], ind[right]);
|
||||
++left;
|
||||
--right;
|
||||
}
|
||||
/* If either list is empty, it means that all remaining features
|
||||
* are identical. Split in the middle to maintain a balanced tree.
|
||||
*/
|
||||
lim1 = left;
|
||||
right = count - 1;
|
||||
for (;;) {
|
||||
while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval) ++left;
|
||||
while (right && left <= right && dataset_get(obj, ind[right], cutfeat) > cutval) --right;
|
||||
if (left > right || !right) break; // "!right" was added to support unsigned Index types
|
||||
std::swap(ind[left], ind[right]);
|
||||
++left;
|
||||
--right;
|
||||
}
|
||||
lim2 = left;
|
||||
}
|
||||
|
||||
DistanceType computeInitialDistances(const Derived& obj, const ElementType* vec, distance_vector_t& dists) const {
|
||||
assert(vec);
|
||||
DistanceType distsq = DistanceType();
|
||||
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
|
||||
if (vec[i] < obj.root_bbox[i].low) {
|
||||
dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i);
|
||||
distsq += dists[i];
|
||||
}
|
||||
if (vec[i] > obj.root_bbox[i].high) {
|
||||
dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i);
|
||||
distsq += dists[i];
|
||||
}
|
||||
}
|
||||
return distsq;
|
||||
}
|
||||
};
|
||||
|
||||
/** @addtogroup kdtrees_grp KD-tree classes and adaptors
|
||||
* @{ */
|
||||
|
||||
/** kd-tree static index
|
||||
*
|
||||
* Contains the k-d trees and other information for indexing a set of points
|
||||
* for nearest-neighbor matching.
|
||||
*
|
||||
* The class "DatasetAdaptor" must provide the following interface (can be
|
||||
* non-virtual, inlined methods):
|
||||
*
|
||||
* \code
|
||||
* // Must return the number of data poins
|
||||
* inline size_t kdtree_get_point_count() const { ... }
|
||||
*
|
||||
*
|
||||
* // Must return the dim'th component of the idx'th point in the class:
|
||||
* inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... }
|
||||
*
|
||||
* // Optional bounding-box computation: return false to default to a standard
|
||||
* bbox computation loop.
|
||||
* // Return true if the BBOX was already computed by the class and returned
|
||||
* in "bb" so it can be avoided to redo it again.
|
||||
* // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3
|
||||
* for point clouds) template <class BBOX> bool kdtree_get_bbox(BBOX &bb) const
|
||||
* {
|
||||
* bb[0].low = ...; bb[0].high = ...; // 0th dimension limits
|
||||
* bb[1].low = ...; bb[1].high = ...; // 1st dimension limits
|
||||
* ...
|
||||
* return true;
|
||||
* }
|
||||
*
|
||||
* \endcode
|
||||
*
|
||||
* \tparam DatasetAdaptor The user-provided adaptor (see comments above).
|
||||
* \tparam Distance The distance metric to use: nanoflann::metric_L1,
|
||||
* nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM
|
||||
* Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will
|
||||
* be typically size_t or int
|
||||
*/
|
||||
template <typename Distance, class DatasetAdaptor, int DIM = -1, typename IndexType = size_t>
|
||||
class KDTreeSingleIndexAdaptorMT : public KDTreeBaseClassMT<KDTreeSingleIndexAdaptorMT<Distance, DatasetAdaptor, DIM, IndexType>, Distance, DatasetAdaptor, DIM, IndexType> {
|
||||
public:
|
||||
/** Deleted copy constructor*/
|
||||
KDTreeSingleIndexAdaptorMT(const KDTreeSingleIndexAdaptorMT<Distance, DatasetAdaptor, DIM, IndexType>&) = delete;
|
||||
|
||||
/**
|
||||
* The dataset used by this index
|
||||
*/
|
||||
const DatasetAdaptor& dataset; //!< The source of our data
|
||||
|
||||
const KDTreeSingleIndexAdaptorParams index_params;
|
||||
|
||||
Distance distance;
|
||||
|
||||
typedef typename nanoflann::KDTreeBaseClassMT<nanoflann::KDTreeSingleIndexAdaptorMT<Distance, DatasetAdaptor, DIM, IndexType>, Distance, DatasetAdaptor, DIM, IndexType>
|
||||
BaseClassRef;
|
||||
|
||||
typedef typename BaseClassRef::ElementType ElementType;
|
||||
typedef typename BaseClassRef::DistanceType DistanceType;
|
||||
|
||||
typedef typename BaseClassRef::Node Node;
|
||||
typedef Node* NodePtr;
|
||||
|
||||
typedef typename BaseClassRef::Interval Interval;
|
||||
/** Define "BoundingBox" as a fixed-size or variable-size container depending
|
||||
* on "DIM" */
|
||||
typedef typename BaseClassRef::BoundingBox BoundingBox;
|
||||
|
||||
/** Define "distance_vector_t" as a fixed-size or variable-size container
|
||||
* depending on "DIM" */
|
||||
typedef typename BaseClassRef::distance_vector_t distance_vector_t;
|
||||
|
||||
/**
|
||||
* KDTree constructor
|
||||
*
|
||||
* Refer to docs in README.md or online in
|
||||
* https://github.com/jlblancoc/nanoflann
|
||||
*
|
||||
* The KD-Tree point dimension (the length of each point in the datase, e.g. 3
|
||||
* for 3D points) is determined by means of:
|
||||
* - The \a DIM template parameter if >0 (highest priority)
|
||||
* - Otherwise, the \a dimensionality parameter of this constructor.
|
||||
*
|
||||
* @param inputData Dataset with the input features
|
||||
* @param params Basically, the maximum leaf node size
|
||||
*/
|
||||
KDTreeSingleIndexAdaptorMT(const int dimensionality, const DatasetAdaptor& inputData, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams())
|
||||
: dataset(inputData),
|
||||
index_params(params),
|
||||
distance(inputData) {
|
||||
BaseClassRef::root_node = NULL;
|
||||
BaseClassRef::m_size = dataset.kdtree_get_point_count();
|
||||
BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
|
||||
BaseClassRef::dim = dimensionality;
|
||||
if (DIM > 0) BaseClassRef::dim = DIM;
|
||||
BaseClassRef::m_leaf_max_size = params.leaf_max_size;
|
||||
|
||||
// Create a permutable array of indices to the input vectors.
|
||||
init_vind();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the index
|
||||
*/
|
||||
void buildIndex(int num_threads = 4) {
|
||||
BaseClassRef::m_size = dataset.kdtree_get_point_count();
|
||||
BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
|
||||
init_vind();
|
||||
this->freeIndex(*this);
|
||||
BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
|
||||
if (BaseClassRef::m_size == 0) return;
|
||||
computeBoundingBox(BaseClassRef::root_bbox);
|
||||
|
||||
BaseClassRef::pool.reserve(BaseClassRef::m_size);
|
||||
|
||||
#pragma omp parallel num_threads(num_threads)
|
||||
{
|
||||
#pragma omp single nowait
|
||||
{
|
||||
BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size,
|
||||
BaseClassRef::root_bbox); // construct the tree
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** \name Query methods
|
||||
* @{ */
|
||||
|
||||
/**
|
||||
* Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
|
||||
* inside the result object.
|
||||
*
|
||||
* Params:
|
||||
* result = the result object in which the indices of the
|
||||
* nearest-neighbors are stored vec = the vector for which to search the
|
||||
* nearest neighbors
|
||||
*
|
||||
* \tparam RESULTSET Should be any ResultSet<DistanceType>
|
||||
* \return True if the requested neighbors could be found.
|
||||
* \sa knnSearch, radiusSearch
|
||||
*/
|
||||
template <typename RESULTSET>
|
||||
bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const {
|
||||
assert(vec);
|
||||
if (this->size(*this) == 0) return false;
|
||||
if (!BaseClassRef::root_node) throw std::runtime_error("[nanoflann] findNeighbors() called before building the index.");
|
||||
float epsError = 1 + searchParams.eps;
|
||||
|
||||
distance_vector_t dists; // fixed or variable-sized container (depending on DIM)
|
||||
auto zero = static_cast<decltype(result.worstDist())>(0);
|
||||
assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim),
|
||||
zero); // Fill it with zeros.
|
||||
DistanceType distsq = this->computeInitialDistances(*this, vec, dists);
|
||||
searchLevel(
|
||||
result,
|
||||
vec,
|
||||
BaseClassRef::root_node,
|
||||
distsq,
|
||||
dists,
|
||||
epsError); // "count_leaf" parameter removed since was neither
|
||||
// used nor returned to the user.
|
||||
return result.full();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1].
|
||||
* Their indices are stored inside the result object. \sa radiusSearch,
|
||||
* findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility
|
||||
* with the original FLANN interface. \return Number `N` of valid points in
|
||||
* the result set. Only the first `N` entries in `out_indices` and
|
||||
* `out_distances_sq` will be valid. Return may be less than `num_closest`
|
||||
* only if the number of elements in the tree is less than `num_closest`.
|
||||
*/
|
||||
size_t knnSearch(const ElementType* query_point, const size_t num_closest, IndexType* out_indices, DistanceType* out_distances_sq, const int /* nChecks_IGNORED */ = 10) const {
|
||||
nanoflann::KNNResultSet<DistanceType, IndexType> resultSet(num_closest);
|
||||
resultSet.init(out_indices, out_distances_sq);
|
||||
this->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
|
||||
return resultSet.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find all the neighbors to \a query_point[0:dim-1] within a maximum radius.
|
||||
* The output is given as a vector of pairs, of which the first element is a
|
||||
* point index and the second the corresponding distance. Previous contents of
|
||||
* \a IndicesDists are cleared.
|
||||
*
|
||||
* If searchParams.sorted==true, the output list is sorted by ascending
|
||||
* distances.
|
||||
*
|
||||
* For a better performance, it is advisable to do a .reserve() on the vector
|
||||
* if you have any wild guess about the number of expected matches.
|
||||
*
|
||||
* \sa knnSearch, findNeighbors, radiusSearchCustomCallback
|
||||
* \return The number of points within the given radius (i.e. indices.size()
|
||||
* or dists.size() )
|
||||
*/
|
||||
size_t radiusSearch(const ElementType* query_point, const DistanceType& radius, std::vector<std::pair<IndexType, DistanceType>>& IndicesDists, const SearchParams& searchParams)
|
||||
const {
|
||||
RadiusResultSet<DistanceType, IndexType> resultSet(radius, IndicesDists);
|
||||
const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams);
|
||||
if (searchParams.sorted) std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter());
|
||||
return nFound;
|
||||
}
|
||||
|
||||
/**
|
||||
* Just like radiusSearch() but with a custom callback class for each point
|
||||
* found in the radius of the query. See the source of RadiusResultSet<> as a
|
||||
* start point for your own classes. \sa radiusSearch
|
||||
*/
|
||||
template <class SEARCH_CALLBACK>
|
||||
size_t radiusSearchCustomCallback(const ElementType* query_point, SEARCH_CALLBACK& resultSet, const SearchParams& searchParams = SearchParams()) const {
|
||||
this->findNeighbors(resultSet, query_point, searchParams);
|
||||
return resultSet.size();
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
||||
public:
|
||||
/** Make sure the auxiliary list \a vind has the same size than the current
|
||||
* dataset, and re-generate if size has changed. */
|
||||
void init_vind() {
|
||||
// Create a permutable array of indices to the input vectors.
|
||||
BaseClassRef::m_size = dataset.kdtree_get_point_count();
|
||||
if (BaseClassRef::vind.size() != BaseClassRef::m_size) BaseClassRef::vind.resize(BaseClassRef::m_size);
|
||||
for (size_t i = 0; i < BaseClassRef::m_size; i++) BaseClassRef::vind[i] = i;
|
||||
}
|
||||
|
||||
void computeBoundingBox(BoundingBox& bbox) {
|
||||
resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim));
|
||||
if (dataset.kdtree_get_bbox(bbox)) {
|
||||
// Done! It was implemented in derived class
|
||||
} else {
|
||||
const size_t N = dataset.kdtree_get_point_count();
|
||||
if (!N)
|
||||
throw std::runtime_error(
|
||||
"[nanoflann] computeBoundingBox() called but "
|
||||
"no data points found.");
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
|
||||
bbox[i].low = bbox[i].high = this->dataset_get(*this, 0, i);
|
||||
}
|
||||
for (size_t k = 1; k < N; ++k) {
|
||||
for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
|
||||
if (this->dataset_get(*this, k, i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, k, i);
|
||||
if (this->dataset_get(*this, k, i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, k, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs an exact search in the tree starting from a node.
|
||||
* \tparam RESULTSET Should be any ResultSet<DistanceType>
|
||||
* \return true if the search should be continued, false if the results are
|
||||
* sufficient
|
||||
*/
|
||||
template <class RESULTSET>
|
||||
bool searchLevel(RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq, distance_vector_t& dists, const float epsError) const {
|
||||
/* If this is a leaf node, then do check and return. */
|
||||
if ((node->child1 == NULL) && (node->child2 == NULL)) {
|
||||
// count_leaf += (node->lr.right-node->lr.left); // Removed since was
|
||||
// neither used nor returned to the user.
|
||||
DistanceType worst_dist = result_set.worstDist();
|
||||
for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) {
|
||||
const IndexType index = BaseClassRef::vind[i]; // reorder... : i;
|
||||
DistanceType dist = distance.evalMetric(vec, index, (DIM > 0 ? DIM : BaseClassRef::dim));
|
||||
if (dist < worst_dist) {
|
||||
if (!result_set.addPoint(dist, BaseClassRef::vind[i])) {
|
||||
// the resultset doesn't want to receive any more points, we're done
|
||||
// searching!
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Which child branch should be taken first? */
|
||||
int idx = node->node_type.sub.divfeat;
|
||||
ElementType val = vec[idx];
|
||||
DistanceType diff1 = val - node->node_type.sub.divlow;
|
||||
DistanceType diff2 = val - node->node_type.sub.divhigh;
|
||||
|
||||
NodePtr bestChild;
|
||||
NodePtr otherChild;
|
||||
DistanceType cut_dist;
|
||||
if ((diff1 + diff2) < 0) {
|
||||
bestChild = node->child1;
|
||||
otherChild = node->child2;
|
||||
cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx);
|
||||
} else {
|
||||
bestChild = node->child2;
|
||||
otherChild = node->child1;
|
||||
cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx);
|
||||
}
|
||||
|
||||
/* Call recursively to search next level down. */
|
||||
if (!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) {
|
||||
// the resultset doesn't want to receive any more points, we're done
|
||||
// searching!
|
||||
return false;
|
||||
}
|
||||
|
||||
DistanceType dst = dists[idx];
|
||||
mindistsq = mindistsq + cut_dist - dst;
|
||||
dists[idx] = cut_dist;
|
||||
if (mindistsq * epsError <= result_set.worstDist()) {
|
||||
if (!searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError)) {
|
||||
// the resultset doesn't want to receive any more points, we're done
|
||||
// searching!
|
||||
return false;
|
||||
}
|
||||
}
|
||||
dists[idx] = dst;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace nanoflann
|
||||
|
||||
#endif /* NANOFLANN_HPP_ */
|
||||
|
|
@ -60,7 +60,7 @@ struct GICPFactor {
|
|||
|
||||
*H = J.transpose() * mahalanobis * J;
|
||||
*b = J.transpose() * mahalanobis * residual;
|
||||
*e = 0.5 * residual.dot(mahalanobis * residual);
|
||||
*e = 0.5 * residual.transpose() * mahalanobis * residual;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -78,7 +78,7 @@ struct GICPFactor {
|
|||
|
||||
const Eigen::Vector4d transed_source_pt = T * traits::point(source, source_index);
|
||||
const Eigen::Vector4d residual = traits::point(target, target_index) - transed_source_pt;
|
||||
return 0.5 * residual.dot(mahalanobis * residual);
|
||||
return 0.5 * residual.transpose() * mahalanobis * residual;
|
||||
}
|
||||
|
||||
/// @brief Returns true if this factor is not rejected as an outlier
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ struct ParallelReductionTBB {
|
|||
//
|
||||
LinearizeSum<TargetPointCloud, SourcePointCloud, TargetTree, CorrespondenceRejector, Factor> sum(target, source, target_tree, rejector, T, factors);
|
||||
|
||||
tbb::parallel_reduce(tbb::blocked_range<size_t>(0, factors.size()), sum);
|
||||
tbb::parallel_reduce(tbb::blocked_range<size_t>(0, factors.size(), 8), sum);
|
||||
|
||||
return {sum.H, sum.b, sum.e};
|
||||
}
|
||||
|
|
@ -131,7 +131,7 @@ struct ParallelReductionTBB {
|
|||
template <typename TargetPointCloud, typename SourcePointCloud, typename Factor>
|
||||
double error(const TargetPointCloud& target, const SourcePointCloud& source, const Eigen::Isometry3d& T, std::vector<Factor>& factors) {
|
||||
ErrorSum<TargetPointCloud, SourcePointCloud, Factor> sum(target, source, T, factors);
|
||||
tbb::parallel_reduce(tbb::blocked_range<size_t>(0, factors.size()), sum);
|
||||
tbb::parallel_reduce(tbb::blocked_range<size_t>(0, factors.size(), 16), sum);
|
||||
return sum.e;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -32,28 +32,33 @@ public:
|
|||
|
||||
struct Summarizer {
|
||||
public:
|
||||
Summarizer(size_t max_size = std::numeric_limits<size_t>::max()) : max_size(max_size) {}
|
||||
Summarizer() : num_data(0), sum(0.0), sq_sum(0.0), last_x(0.0) {}
|
||||
|
||||
void push(double x) { results.emplace_back(x); }
|
||||
void push(double x) {
|
||||
num_data++;
|
||||
sum += x;
|
||||
sq_sum += x * x;
|
||||
last_x = x;
|
||||
}
|
||||
|
||||
std::pair<double, double> mean_std() const {
|
||||
const double sum = std::accumulate(results.begin(), results.end(), 0.0);
|
||||
const double sum_sq = std::accumulate(results.begin(), results.end(), 0.0, [](double sum, double x) { return sum + x * x; });
|
||||
|
||||
const double mean = sum / results.size();
|
||||
const double var = (sum_sq - mean * sum) / results.size();
|
||||
|
||||
const double mean = sum / num_data;
|
||||
const double var = (sq_sum - mean * sum) / num_data;
|
||||
return {mean, std::sqrt(var)};
|
||||
}
|
||||
|
||||
double last() const { return last_x; }
|
||||
|
||||
std::string str() const {
|
||||
const auto [mean, std] = mean_std();
|
||||
return fmt::format("{:.3f} +- {:.3f}", mean, std);
|
||||
return fmt::format("{:.3f} +- {:.3f} (last={:.3f})", mean, std, last_x);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_size;
|
||||
std::deque<double> results;
|
||||
size_t num_data;
|
||||
double sum;
|
||||
double sq_sum;
|
||||
double last_x;
|
||||
};
|
||||
|
||||
template <typename Container, typename Transform>
|
||||
|
|
@ -87,16 +92,27 @@ public:
|
|||
}
|
||||
|
||||
template <typename PointCloud>
|
||||
std::vector<std::shared_ptr<PointCloud>> convert() const {
|
||||
std::vector<std::shared_ptr<PointCloud>> convert(bool release = false) {
|
||||
std::vector<std::shared_ptr<PointCloud>> converted(points.size());
|
||||
std::ranges::transform(points, converted.begin(), [](const auto& raw_points) {
|
||||
std::ranges::transform(points, converted.begin(), [=](auto& raw_points) {
|
||||
auto points = std::make_shared<PointCloud>();
|
||||
traits::resize(*points, raw_points.size());
|
||||
for (size_t i = 0; i < raw_points.size(); i++) {
|
||||
traits::set_point(*points, i, raw_points[i].template cast<double>());
|
||||
}
|
||||
|
||||
if (release) {
|
||||
raw_points.clear();
|
||||
raw_points.shrink_to_fit();
|
||||
}
|
||||
return points;
|
||||
});
|
||||
|
||||
if (release) {
|
||||
points.clear();
|
||||
points.shrink_to_fit();
|
||||
}
|
||||
|
||||
return converted;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
|
||||
namespace small_gicp {
|
||||
|
||||
/// @brief Voxelgrid downsampling
|
||||
/// @brief Voxelgrid downsampling.
|
||||
/// @note Discretized voxel coords must be in 21bit range [-1048576, 1048575].
|
||||
/// @param points Input points
|
||||
/// @param leaf_size Downsampling resolution
|
||||
/// @return Downsampled points
|
||||
|
|
@ -28,7 +29,7 @@ std::shared_ptr<OutputPointCloud> voxelgrid_sampling(const InputPointCloud& poin
|
|||
|
||||
std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size());
|
||||
for (size_t i = 0; i < traits::size(points); i++) {
|
||||
// TODO: Check if coord in 21bit range
|
||||
// TODO: Check if coord is within 21bit range
|
||||
const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset;
|
||||
|
||||
// Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
|
||||
#include <small_gicp/points/traits.hpp>
|
||||
#include <small_gicp/util/fast_floor.hpp>
|
||||
#include <small_gicp/util/vector3i_hash.hpp>
|
||||
#include <small_gicp/util/sort_omp.hpp>
|
||||
|
||||
namespace small_gicp {
|
||||
|
||||
/// @brief Voxel grid downsampling with OpenMP backend.
|
||||
/// @note Discretized voxel coords must be in 21bit range [-1048576, 1048575].
|
||||
/// @param points Input points
|
||||
/// @param leaf_size Downsampling resolution
|
||||
/// @return Downsampled points
|
||||
template <typename InputPointCloud, typename OutputPointCloud = InputPointCloud>
|
||||
std::shared_ptr<OutputPointCloud> voxelgrid_sampling_omp(const InputPointCloud& points, double leaf_size, int num_threads = 4) {
|
||||
if (traits::size(points) == 0) {
|
||||
std::cerr << "warning: empty input points!!" << std::endl;
|
||||
return std::make_shared<OutputPointCloud>();
|
||||
}
|
||||
|
||||
const double inv_leaf_size = 1.0 / leaf_size;
|
||||
const int coord_bit_size = 21; // Bits to represent each voxel coordinate (pack 21x3 = 63bits in 64bit int)
|
||||
const size_t coord_bit_mask = (1 << 21) - 1; // Bit mask
|
||||
const int coord_offset = 1 << (coord_bit_size - 1); // Coordinate offset to make values positive
|
||||
|
||||
std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size());
|
||||
#pragma omp parallel for num_threads(num_threads) schedule(guided, 32)
|
||||
for (size_t i = 0; i < traits::size(points); i++) {
|
||||
// TODO: Check if coord is within 21bit range
|
||||
const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset;
|
||||
|
||||
// Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit)
|
||||
const std::uint64_t bits = //
|
||||
((coord[0] & coord_bit_mask) << (coord_bit_size * 0)) | //
|
||||
((coord[1] & coord_bit_mask) << (coord_bit_size * 1)) | //
|
||||
((coord[2] & coord_bit_mask) << (coord_bit_size * 2));
|
||||
coord_pt[i] = {bits, i};
|
||||
}
|
||||
|
||||
// Sort by voxel coords
|
||||
quick_sort_omp(
|
||||
coord_pt.begin(),
|
||||
coord_pt.end(),
|
||||
[](const auto& lhs, const auto& rhs) { return lhs.first < rhs.first; },
|
||||
num_threads);
|
||||
|
||||
auto downsampled = std::make_shared<OutputPointCloud>();
|
||||
traits::resize(*downsampled, traits::size(points));
|
||||
|
||||
// Take block-wise sum
|
||||
const int block_size = 1024;
|
||||
std::atomic_uint64_t num_points = 0;
|
||||
|
||||
#pragma omp parallel for num_threads(num_threads) schedule(guided, 4)
|
||||
for (size_t block_begin = 0; block_begin < traits::size(points); block_begin += block_size) {
|
||||
std::vector<Eigen::Vector4d> sub_points;
|
||||
sub_points.reserve(block_size);
|
||||
|
||||
const size_t block_end = std::min<size_t>(traits::size(points), block_begin + block_size);
|
||||
|
||||
Eigen::Vector4d sum_pt = traits::point(points, coord_pt[block_begin].second);
|
||||
for (size_t i = block_begin + 1; i != block_end; i++) {
|
||||
if (coord_pt[i - 1].first != coord_pt[i].first) {
|
||||
sub_points.emplace_back(sum_pt / sum_pt.w());
|
||||
sum_pt.setZero();
|
||||
}
|
||||
sum_pt += traits::point(points, coord_pt[i].second);
|
||||
}
|
||||
sub_points.emplace_back(sum_pt / sum_pt.w());
|
||||
|
||||
const size_t point_index_begin = num_points.fetch_add(sub_points.size());
|
||||
for (size_t i = 0; i < sub_points.size(); i++) {
|
||||
traits::set_point(*downsampled, point_index_begin + i, sub_points[i]);
|
||||
}
|
||||
}
|
||||
|
||||
traits::resize(*downsampled, num_points);
|
||||
|
||||
return downsampled;
|
||||
}
|
||||
|
||||
} // namespace small_gicp
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
|
||||
#include <tbb/tbb.h>
|
||||
#include <small_gicp/points/traits.hpp>
|
||||
|
|
@ -10,7 +11,8 @@
|
|||
|
||||
namespace small_gicp {
|
||||
|
||||
/// @brief Voxel grid downsampling using TBB.
|
||||
/// @brief Voxel grid downsampling with TBB backend.
|
||||
/// @note Discretized voxel coords must be in 21bit range [-1048576, 1048575].
|
||||
/// @param points Input points
|
||||
/// @param leaf_size Downsampling resolution
|
||||
/// @return Downsampled points
|
||||
|
|
@ -27,16 +29,18 @@ std::shared_ptr<OutputPointCloud> voxelgrid_sampling_tbb(const InputPointCloud&
|
|||
const int coord_offset = 1 << (coord_bit_size - 1); // Coordinate offset to make values positive
|
||||
|
||||
std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size());
|
||||
tbb::parallel_for(size_t(0), size_t(traits::size(points)), [&](size_t i) {
|
||||
// TODO: Check if coord in 21bit range
|
||||
const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset;
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0, traits::size(points), 64), [&](const tbb::blocked_range<size_t>& range) {
|
||||
for (size_t i = range.begin(); i != range.end(); i++) {
|
||||
// TODO: Check if coord is within 21bit range
|
||||
const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset;
|
||||
|
||||
// Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit)
|
||||
const std::uint64_t bits = //
|
||||
((coord[0] & coord_bit_mask) << (coord_bit_size * 0)) | //
|
||||
((coord[1] & coord_bit_mask) << (coord_bit_size * 1)) | //
|
||||
((coord[2] & coord_bit_mask) << (coord_bit_size * 2));
|
||||
coord_pt[i] = {bits, i};
|
||||
// Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit)
|
||||
const std::uint64_t bits = //
|
||||
((coord[0] & coord_bit_mask) << (coord_bit_size * 0)) | //
|
||||
((coord[1] & coord_bit_mask) << (coord_bit_size * 1)) | //
|
||||
((coord[2] & coord_bit_mask) << (coord_bit_size * 2));
|
||||
coord_pt[i] = {bits, i};
|
||||
}
|
||||
});
|
||||
|
||||
// Sort by voxel coords
|
||||
|
|
|
|||
|
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace small_gicp {
|
||||
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
void merge_sort_omp_impl(RandomAccessIterator first, RandomAccessIterator last, const Compare& comp) {
|
||||
const size_t n = std::distance(first, last);
|
||||
if (n < 1024) {
|
||||
std::sort(first, last, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
auto center = first + n / 2;
|
||||
|
||||
#pragma omp task
|
||||
merge_sort_omp_impl(first, center, comp);
|
||||
|
||||
#pragma omp task
|
||||
merge_sort_omp_impl(center, last, comp);
|
||||
|
||||
#pragma omp taskwait
|
||||
std::inplace_merge(first, center, last, comp);
|
||||
}
|
||||
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
void merge_sort_omp(RandomAccessIterator first, RandomAccessIterator last, const Compare& comp, int num_threads) {
|
||||
#pragma omp parallel num_threads(num_threads)
|
||||
{
|
||||
#pragma omp single nowait
|
||||
{ merge_sort_omp_impl(first, last, comp); }
|
||||
}
|
||||
}
|
||||
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
void quick_sort_omp_impl(RandomAccessIterator first, RandomAccessIterator last, const Compare& comp) {
|
||||
const std::ptrdiff_t n = std::distance(first, last);
|
||||
if (n < 1024) {
|
||||
std::sort(first, last, comp);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto median3 = [&](const auto& a, const auto& b, const auto& c, const Compare& comp) {
|
||||
return comp(a, b) ? (comp(b, c) ? b : (comp(a, c) ? c : a)) : (comp(a, c) ? a : (comp(b, c) ? c : b));
|
||||
};
|
||||
|
||||
const int offset = n / 8;
|
||||
const auto m1 = median3(*first, *(first + offset), *(first + offset * 2), comp);
|
||||
const auto m2 = median3(*(first + offset * 3), *(first + offset * 4), *(first + offset * 5), comp);
|
||||
const auto m3 = median3(*(first + offset * 6), *(first + offset * 7), *(last - 1), comp);
|
||||
|
||||
auto pivot = median3(m1, m2, m3, comp);
|
||||
auto middle1 = std::partition(first, last, [&](const auto& val) { return comp(val, pivot); });
|
||||
auto middle2 = std::partition(middle1, last, [&](const auto& val) { return !comp(pivot, val); });
|
||||
|
||||
#pragma omp task
|
||||
quick_sort_omp_impl(first, middle1, comp);
|
||||
|
||||
#pragma omp task
|
||||
quick_sort_omp_impl(middle2, last, comp);
|
||||
}
|
||||
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
void quick_sort_omp(RandomAccessIterator first, RandomAccessIterator last, const Compare& comp, int num_threads) {
|
||||
#pragma omp parallel num_threads(num_threads)
|
||||
{
|
||||
#pragma omp single nowait
|
||||
{ quick_sort_omp_impl(first, last, comp); }
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace small_gicp
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <small_gicp/util/benchmark.hpp>
|
||||
#include <small_gicp/util/downsampling.hpp>
|
||||
#include <small_gicp/util/downsampling_omp.hpp>
|
||||
#include <small_gicp/util/downsampling_tbb.hpp>
|
||||
#include <small_gicp/points/point_cloud.hpp>
|
||||
#include <small_gicp/points/pcl_point.hpp>
|
||||
|
|
@ -48,28 +49,50 @@ auto downsample_pcl(const PointCloudPtr& points, double leaf_size) {
|
|||
int main(int argc, char** argv) {
|
||||
using namespace small_gicp;
|
||||
|
||||
std::cout << "SIMD in use: " << Eigen::SimdInstructionSetsInUse() << std::endl;
|
||||
const std::string dataset_path = "/home/koide/datasets/velodyne";
|
||||
if (argc < 2) {
|
||||
std::cout << "usage: downsampling_benchmark <dataset_path> (--num_threads 4) (--max_num_frames 1000)" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::cout << "Load dataset from " << dataset_path << std::endl;
|
||||
KittiDataset kitti(dataset_path, 1000);
|
||||
const std::string dataset_path = argv[1];
|
||||
|
||||
int num_threads = 4;
|
||||
size_t max_num_frames = 1000;
|
||||
|
||||
for (auto arg = argv + 2; arg != argv + argc; arg++) {
|
||||
if (std::string(*arg) == "--num_threads") {
|
||||
num_threads = std::stoi(*(arg + 1));
|
||||
} else if (std::string(*arg) == "--max_num_frames") {
|
||||
max_num_frames = std::stoul(*(arg + 1));
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "dataset_path=" << dataset_path << std::endl;
|
||||
std::cout << "max_num_frames=" << max_num_frames << std::endl;
|
||||
std::cout << "num_threads=" << num_threads << std::endl;
|
||||
|
||||
tbb::global_control control(tbb::global_control::max_allowed_parallelism, num_threads);
|
||||
|
||||
KittiDataset kitti(dataset_path, max_num_frames);
|
||||
std::cout << "num_frames=" << kitti.points.size() << std::endl;
|
||||
std::cout << fmt::format("num_points={} [points]", summarize(kitti.points, [](const auto& pts) { return pts.size(); })) << std::endl;
|
||||
|
||||
const auto points_pcl = kitti.convert<pcl::PointCloud<pcl::PointXYZ>>();
|
||||
const auto points_pcl = kitti.convert<pcl::PointCloud<pcl::PointXYZ>>(true);
|
||||
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << "leaf_size=0.1(warmup)" << std::endl;
|
||||
std::cout << "leaf_size=0.5(warmup)" << std::endl;
|
||||
std::cout << fmt::format("{:25}: ", "pcl_voxelgrid") << std::flush;
|
||||
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::VoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
|
||||
benchmark(points_pcl, 0.5, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::VoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
|
||||
std::cout << fmt::format("{:25}: ", "pcl_approx_voxelgrid") << std::flush;
|
||||
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::ApproximateVoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
|
||||
benchmark(points_pcl, 0.5, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::ApproximateVoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
|
||||
std::cout << fmt::format("{:25}: ", "small_voxelgrid") << std::flush;
|
||||
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); });
|
||||
benchmark(points_pcl, 0.5, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); });
|
||||
std::cout << fmt::format("{:25}: ", "small_voxelgrid_tbb") << std::flush;
|
||||
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); });
|
||||
benchmark(points_pcl, 0.5, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); });
|
||||
std::cout << fmt::format("{:25}: ", "small_voxelgrid_omp") << std::flush;
|
||||
benchmark(points_pcl, 0.5, [=](const auto& points, double leaf_size) { return voxelgrid_sampling_omp(*points, leaf_size, num_threads); });
|
||||
|
||||
for (double leaf_size = 1.0; leaf_size <= 1.51; leaf_size += 0.1) {
|
||||
for (double leaf_size = 0.1; leaf_size <= 1.51; leaf_size += 0.1) {
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << "leaf_size=" << leaf_size << std::endl;
|
||||
std::cout << fmt::format("{:25}: ", "pcl_voxelgrid") << std::flush;
|
||||
|
|
@ -80,6 +103,8 @@ int main(int argc, char** argv) {
|
|||
benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); });
|
||||
std::cout << fmt::format("{:25}: ", "small_voxelgrid_tbb") << std::flush;
|
||||
benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); });
|
||||
std::cout << fmt::format("{:25}: ", "small_voxelgrid_omp") << std::flush;
|
||||
benchmark(points_pcl, leaf_size, [=](const auto& points, double leaf_size) { return voxelgrid_sampling_omp(*points, leaf_size, num_threads); });
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include <pcl/point_cloud.h>
|
||||
|
||||
#include <small_gicp/ann/kdtree.hpp>
|
||||
#include <small_gicp/ann/kdtree_mt.hpp>
|
||||
#include <small_gicp/util/downsampling.hpp>
|
||||
#include <small_gicp/util/downsampling_tbb.hpp>
|
||||
#include <small_gicp/util/normal_estimation.hpp>
|
||||
|
|
@ -15,6 +16,7 @@
|
|||
#include <small_gicp/factors/icp_factor.hpp>
|
||||
#include <small_gicp/factors/plane_icp_factor.hpp>
|
||||
#include <small_gicp/factors/gicp_factor.hpp>
|
||||
#include <small_gicp/registration/reduction_omp.hpp>
|
||||
#include <small_gicp/registration/reduction_tbb.hpp>
|
||||
#include <small_gicp/registration/registration.hpp>
|
||||
|
||||
|
|
@ -25,89 +27,176 @@
|
|||
#include <glk/pointcloud_buffer_pcl.hpp>
|
||||
#include <guik/viewer/light_viewer.hpp>
|
||||
|
||||
template <typename PointT>
|
||||
pcl::PointCloud<pcl::PointXYZ>::Ptr convert_pcl(const pcl::PointCloud<PointT>& points) {
|
||||
auto converted = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
|
||||
converted->is_dense = false;
|
||||
converted->width = points.size();
|
||||
converted->height = 1;
|
||||
namespace small_gicp {
|
||||
|
||||
converted->resize(points.size());
|
||||
for (size_t i = 0; i < points.size(); i++) {
|
||||
converted->at(i).getVector4fMap() = points.at(i).getVector4fMap();
|
||||
class OdometryEstimater {
|
||||
public:
|
||||
OdometryEstimater() = default;
|
||||
virtual ~OdometryEstimater() = default;
|
||||
|
||||
virtual Eigen::Isometry3d estimate(const PointCloud::Ptr& points) = 0;
|
||||
|
||||
const Summarizer& registration_times() const { return reg_times; }
|
||||
|
||||
protected:
|
||||
Summarizer reg_times;
|
||||
};
|
||||
|
||||
class FastGICPOdometryEstimater : public OdometryEstimater {
|
||||
public:
|
||||
FastGICPOdometryEstimater(int num_threads) : T(Eigen::Isometry3d::Identity()) {
|
||||
gicp.setCorrespondenceRandomness(10);
|
||||
gicp.setMaxCorrespondenceDistance(1.0);
|
||||
gicp.setNumThreads(num_threads);
|
||||
}
|
||||
return converted;
|
||||
}
|
||||
|
||||
Eigen::Isometry3d estimate(const PointCloud::Ptr& points) override {
|
||||
auto points_pcl = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
|
||||
points_pcl->resize(points->size());
|
||||
for (size_t i = 0; i < points->size(); i++) {
|
||||
points_pcl->at(i).getVector4fMap() = points->point(i).template cast<float>();
|
||||
}
|
||||
|
||||
Stopwatch sw;
|
||||
sw.start();
|
||||
|
||||
if (!gicp.getInputTarget()) {
|
||||
gicp.setInputTarget(points_pcl);
|
||||
return Eigen::Isometry3d::Identity();
|
||||
}
|
||||
|
||||
gicp.setInputSource(points_pcl);
|
||||
pcl::PointCloud<pcl::PointXYZ> aligned;
|
||||
gicp.align(aligned);
|
||||
|
||||
sw.stop();
|
||||
reg_times.push(sw.msec());
|
||||
|
||||
T = T * Eigen::Isometry3d(gicp.getFinalTransformation().cast<double>());
|
||||
gicp.swapSourceAndTarget();
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
private:
|
||||
fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ> gicp;
|
||||
Eigen::Isometry3d T;
|
||||
};
|
||||
|
||||
class SmallGICPOdometryEstimater : public OdometryEstimater {
|
||||
public:
|
||||
SmallGICPOdometryEstimater(int num_threads) : num_threads(num_threads), T(Eigen::Isometry3d::Identity()) {}
|
||||
|
||||
Eigen::Isometry3d estimate(const PointCloud::Ptr& points) override {
|
||||
Stopwatch sw;
|
||||
sw.start();
|
||||
|
||||
auto tree = std::make_shared<KdTreeMT<PointCloud>>(points);
|
||||
estimate_covariances_tbb(*points, *tree, 10);
|
||||
|
||||
if (!target_points) {
|
||||
target_points = points;
|
||||
target_tree = tree;
|
||||
return Eigen::Isometry3d::Identity();
|
||||
}
|
||||
|
||||
Registration<GICPFactor, DistanceRejector, ParallelReductionTBB, LevenbergMarquardtOptimizer> registration;
|
||||
const auto result = registration.align(*target_points, *points, *target_tree, Eigen::Isometry3d::Identity());
|
||||
|
||||
sw.stop();
|
||||
reg_times.push(sw.msec());
|
||||
|
||||
T = T * result.T_target_source;
|
||||
target_points = points;
|
||||
target_tree = tree;
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
private:
|
||||
int num_threads;
|
||||
PointCloud::ConstPtr target_points;
|
||||
KdTreeMT<PointCloud>::Ptr target_tree;
|
||||
|
||||
Eigen::Isometry3d T;
|
||||
};
|
||||
|
||||
} // namespace small_gicp
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
using namespace small_gicp;
|
||||
|
||||
std::cout << "SIMD in use: " << Eigen::SimdInstructionSetsInUse() << std::endl;
|
||||
const std::string dataset_path = "/home/koide/datasets/velodyne";
|
||||
if (argc < 3) {
|
||||
std::cout << "usage: odometry_benchmark <dataset_path> <output_path> (--engine small|fast) (--num_threads 4) (--resolution 0.25)" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::cout << "Load dataset from " << dataset_path << std::endl;
|
||||
KittiDataset kitti(dataset_path, 1000);
|
||||
const std::string dataset_path = argv[1];
|
||||
const std::string output_path = argv[2];
|
||||
|
||||
int num_threads = 4;
|
||||
double downsampling_resolution = 0.25;
|
||||
std::string engine = "small";
|
||||
|
||||
for (auto arg = argv + 3; arg != argv + argc; arg++) {
|
||||
if (std::string(*arg) == "--num_threads") {
|
||||
num_threads = std::stoi(*(arg + 1));
|
||||
} else if (std::string(*arg) == "--resolution") {
|
||||
downsampling_resolution = std::stod(*(arg + 1));
|
||||
} else if (std::string(*arg) == "--engine") {
|
||||
engine = *(arg + 1);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "dataset_path=" << dataset_path << std::endl;
|
||||
std::cout << "output_path=" << output_path << std::endl;
|
||||
std::cout << "registration_engine=" << engine << std::endl;
|
||||
std::cout << "num_threads=" << num_threads << std::endl;
|
||||
std::cout << "downsampling_resolution=" << downsampling_resolution << std::endl;
|
||||
|
||||
tbb::global_control control(tbb::global_control::max_allowed_parallelism, num_threads);
|
||||
|
||||
std::shared_ptr<OdometryEstimater> odom;
|
||||
if (engine == "small") {
|
||||
odom = std::make_shared<SmallGICPOdometryEstimater>(num_threads);
|
||||
} else if (engine == "fast") {
|
||||
odom = std::make_shared<FastGICPOdometryEstimater>(num_threads);
|
||||
} else {
|
||||
std::cerr << "Unknown engine: " << engine << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
KittiDataset kitti(dataset_path);
|
||||
std::cout << "num_frames=" << kitti.points.size() << std::endl;
|
||||
std::cout << fmt::format("num_points={} [points]", summarize(kitti.points, [](const auto& pts) { return pts.size(); })) << std::endl;
|
||||
|
||||
auto raw_points = kitti.convert<pcl::PointCloud<pcl::PointCovariance>>();
|
||||
kitti.points.clear();
|
||||
auto raw_points = kitti.convert<PointCloud>(true);
|
||||
|
||||
Eigen::Isometry3d T = Eigen::Isometry3d::Identity();
|
||||
auto target = voxelgrid_sampling(*raw_points.front(), 0.25);
|
||||
estimate_covariances_tbb(*target, 10);
|
||||
auto target_tree = std::make_shared<KdTree<pcl::PointCloud<pcl::PointCovariance>>>(target);
|
||||
std::vector<Eigen::Isometry3d> traj;
|
||||
for (size_t i = 0; i < raw_points.size(); i++) {
|
||||
auto downsampled = voxelgrid_sampling(*raw_points[i], downsampling_resolution);
|
||||
const Eigen::Isometry3d T = odom->estimate(downsampled);
|
||||
|
||||
auto viewer = guik::viewer();
|
||||
viewer->disable_vsync();
|
||||
if (i && i % 256 == 0) {
|
||||
std::cout << fmt::format("{}/{} : {}", i, raw_points.size(), odom->registration_times().str()) << std::endl;
|
||||
}
|
||||
traj.emplace_back(T);
|
||||
}
|
||||
|
||||
Stopwatch sw;
|
||||
Summarizer gicp_times(10);
|
||||
Summarizer registration_times(10);
|
||||
std::cout << "registration_time_stats=" << odom->registration_times().str() << std::endl;
|
||||
|
||||
fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ> gicp;
|
||||
gicp.setCorrespondenceRandomness(10);
|
||||
gicp.setMaxCorrespondenceDistance(1.0);
|
||||
gicp.setInputTarget(convert_pcl(*target));
|
||||
gicp.setNumThreads(6);
|
||||
std::ofstream ofs(output_path);
|
||||
for (const auto& T : traj) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
if (i || j) {
|
||||
ofs << " ";
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < raw_points.size() && viewer->spin_once(); i++) {
|
||||
auto points = voxelgrid_sampling(*raw_points[i], 0.25);
|
||||
auto source_pts = convert_pcl(*points);
|
||||
|
||||
/*
|
||||
sw.start();
|
||||
gicp.setInputSource(source_pts);
|
||||
pcl::PointCloud<pcl::PointXYZ> aligned;
|
||||
gicp.align(aligned);
|
||||
const Eigen::Matrix4f T_ = gicp.getFinalTransformation();
|
||||
T = T * Eigen::Isometry3d(T_.cast<double>());
|
||||
sw.stop();
|
||||
|
||||
gicp.swapSourceAndTarget();
|
||||
gicp_times.push(sw.msec());
|
||||
//*/
|
||||
|
||||
//*
|
||||
sw.start();
|
||||
auto tree = std::make_shared<KdTree<pcl::PointCloud<pcl::PointCovariance>>>(points);
|
||||
estimate_covariances_tbb(*points, *tree, 10);
|
||||
Registration<GICPFactor, DistanceRejector, ParallelReductionTBB, LevenbergMarquardtOptimizer> registration;
|
||||
auto result = registration.align(*target, *points, *target_tree, Eigen::Isometry3d::Identity());
|
||||
sw.stop();
|
||||
registration_times.push(sw.msec());
|
||||
|
||||
T = T * result.T_target_source;
|
||||
|
||||
target = points;
|
||||
target_tree = tree;
|
||||
//*/
|
||||
|
||||
std::cout << "gicp=" << gicp_times.str() << "[msec] registration=" << registration_times.str() << "[msec]" << std::endl;
|
||||
|
||||
auto cloud_buffer = glk::create_point_cloud_buffer(*points);
|
||||
viewer->update_drawable("current", cloud_buffer, guik::FlatOrange(T).set_point_scale(2.0f));
|
||||
viewer->update_drawable(guik::anon(), cloud_buffer, guik::Rainbow(T));
|
||||
ofs << fmt::format("{:.6f}", T(i, j));
|
||||
}
|
||||
}
|
||||
ofs << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,113 @@
|
|||
#include <random>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <algorithm>
|
||||
|
||||
#include <tbb/tbb.h>
|
||||
#include <fmt/format.h>
|
||||
#include <small_gicp/util/benchmark.hpp>
|
||||
#include <small_gicp/util/sort_omp.hpp>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
std::vector<std::vector<std::pair<std::uint64_t, size_t>>> dataset(1000);
|
||||
std::cout << "read dataset" << std::endl;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < dataset.size(); i++) {
|
||||
std::ifstream ifs(fmt::format("/tmp/coord_pt_{:06d}.txt", i));
|
||||
if (!ifs) {
|
||||
std::cerr << "failed to open " << i << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::uint64_t, size_t>> coord_pt;
|
||||
coord_pt.reserve(200000);
|
||||
|
||||
std::string line;
|
||||
while (!ifs.eof() && std::getline(ifs, line) && !line.empty()) {
|
||||
std::stringstream sst(line);
|
||||
std::uint64_t coord;
|
||||
size_t pt;
|
||||
sst >> coord >> pt;
|
||||
coord_pt.emplace_back(coord, pt);
|
||||
}
|
||||
|
||||
dataset[i] = std::move(coord_pt);
|
||||
}
|
||||
|
||||
using namespace small_gicp;
|
||||
|
||||
Stopwatch sw;
|
||||
const int num_threads = 8;
|
||||
tbb::global_control control(tbb::global_control::max_allowed_parallelism, num_threads);
|
||||
|
||||
std::vector<std::vector<std::pair<std::uint64_t, size_t>>> omp_sorted_dataset = dataset;
|
||||
Summarizer omp_times;
|
||||
|
||||
sw.start();
|
||||
for (auto& coord_pt : omp_sorted_dataset) {
|
||||
quick_sort_omp(
|
||||
coord_pt.begin(),
|
||||
coord_pt.end(),
|
||||
[](const auto& a, const auto& b) { return a.first < b.first; },
|
||||
num_threads);
|
||||
sw.lap();
|
||||
omp_times.push(sw.msec());
|
||||
}
|
||||
|
||||
std::cout << "omp=" << omp_times.str() << std::endl;
|
||||
|
||||
std::vector<std::vector<std::pair<std::uint64_t, size_t>>> std_sorted_dataset = dataset;
|
||||
Summarizer std_times;
|
||||
|
||||
sw.start();
|
||||
for (auto& coord_pt : std_sorted_dataset) {
|
||||
std::ranges::sort(coord_pt, [](const auto& a, const auto& b) { return a.first < b.first; });
|
||||
sw.lap();
|
||||
std_times.push(sw.msec());
|
||||
}
|
||||
|
||||
std::cout << "std=" << std_times.str() << std::endl;
|
||||
|
||||
std::vector<std::vector<std::pair<std::uint64_t, size_t>>> stable_sorted_dataset = dataset;
|
||||
Summarizer stable_times;
|
||||
|
||||
sw.start();
|
||||
for (auto& coord_pt : std_sorted_dataset) {
|
||||
std::ranges::stable_sort(coord_pt, [](const auto& a, const auto& b) { return a.first < b.first; });
|
||||
sw.lap();
|
||||
stable_times.push(sw.msec());
|
||||
}
|
||||
|
||||
std::cout << "stable=" << stable_times.str() << std::endl;
|
||||
|
||||
std::vector<std::vector<std::pair<std::uint64_t, size_t>>> tbb_sorted_dataset = dataset;
|
||||
Summarizer tbb_times;
|
||||
|
||||
sw.start();
|
||||
for (auto& coord_pt : tbb_sorted_dataset) {
|
||||
tbb::parallel_sort(coord_pt, [](const auto& a, const auto& b) { return a.first < b.first; });
|
||||
sw.lap();
|
||||
tbb_times.push(sw.msec());
|
||||
}
|
||||
|
||||
std::cout << "tbb=" << tbb_times.str() << std::endl;
|
||||
|
||||
std::cout << "validate" << std::endl;
|
||||
for (size_t i = 0; i < std_sorted_dataset.size(); i++) {
|
||||
for (size_t j = 0; j < std_sorted_dataset[i].size(); j++) {
|
||||
if (std_sorted_dataset[i][j].first != tbb_sorted_dataset[i][j].first) {
|
||||
std::cerr << "error: " << i << " " << j << std::endl;
|
||||
abort();
|
||||
}
|
||||
if (std_sorted_dataset[i][j].first != omp_sorted_dataset[i][j].first) {
|
||||
std::cerr << "error: " << i << " " << j << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue