fast_floor

This commit is contained in:
k.koide 2024-03-24 17:44:20 +09:00
parent dbc3efe0ae
commit 2b101cda7a
10 changed files with 149 additions and 51 deletions

View File

@ -18,6 +18,7 @@ if(BUILD_WITH_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "-march=native ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-march=native ${CMAKE_CXX_FLAGS}")
endif() endif()
find_package(fmt REQUIRED)
find_package(PCL REQUIRED) find_package(PCL REQUIRED)
find_package(TBB REQUIRED) find_package(TBB REQUIRED)
find_package(Eigen3 REQUIRED) find_package(Eigen3 REQUIRED)
@ -49,6 +50,23 @@ endif()
# ${Iridescence_LIBRARIES} # ${Iridescence_LIBRARIES}
# ) # )
add_executable(downsampling_test
src/downsampling_test.cpp
)
target_include_directories(downsampling_test PUBLIC
include
${PCL_INCLUDE_DIRS}
${TBB_INCLUDE_DIRS}
${EIGEN3_INCLUDE_DIR}
${Iridescence_INCLUDE_DIRS}
)
target_link_libraries(downsampling_test
fmt::fmt
${PCL_LIBRARIES}
${TBB_LIBRARIES}
${Iridescence_LIBRARIES}
)
add_executable(downsampling_benchmark add_executable(downsampling_benchmark
src/downsampling_benchmark.cpp src/downsampling_benchmark.cpp
) )
@ -57,10 +75,9 @@ target_include_directories(downsampling_benchmark PUBLIC
${PCL_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS}
${TBB_INCLUDE_DIRS} ${TBB_INCLUDE_DIRS}
${EIGEN3_INCLUDE_DIR} ${EIGEN3_INCLUDE_DIR}
${Iridescence_INCLUDE_DIRS}
) )
target_link_libraries(downsampling_benchmark target_link_libraries(downsampling_benchmark
fmt::fmt
${PCL_LIBRARIES} ${PCL_LIBRARIES}
${TBB_LIBRARIES} ${TBB_LIBRARIES}
${Iridescence_LIBRARIES}
) )

View File

@ -3,6 +3,7 @@
#include <atomic> #include <atomic>
#include <tbb/tbb.h> #include <tbb/tbb.h>
#include <small_gicp/ann/kdtree.hpp> #include <small_gicp/ann/kdtree.hpp>
#include <small_gicp/util/fast_floor.hpp>
#include <small_gicp/util/vector3i_hash.hpp> #include <small_gicp/util/vector3i_hash.hpp>
namespace small_gicp { namespace small_gicp {
@ -21,7 +22,7 @@ public:
CachedKdTree(const PointCloud& points, double leaf_size) : inv_leaf_size(1.0 / leaf_size), kdtree(points) {} CachedKdTree(const PointCloud& points, double leaf_size) : inv_leaf_size(1.0 / leaf_size), kdtree(points) {}
size_t knn_search(const Eigen::Vector4d& pt, size_t k, size_t* k_indices, double* k_sq_dists) const { size_t knn_search(const Eigen::Vector4d& pt, size_t k, size_t* k_indices, double* k_sq_dists) const {
const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().cast<int>().head<3>(); const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).head<3>();
CacheTable::const_accessor ca; CacheTable::const_accessor ca;
if (cache.find(ca, coord)) { if (cache.find(ca, coord)) {

View File

@ -5,6 +5,7 @@
#include <memory> #include <memory>
#include <small_gicp/ann/traits.hpp> #include <small_gicp/ann/traits.hpp>
#include <small_gicp/points/traits.hpp> #include <small_gicp/points/traits.hpp>
#include <small_gicp/util/fast_floor.hpp>
#include <small_gicp/util/vector3i_hash.hpp> #include <small_gicp/util/vector3i_hash.hpp>
namespace small_gicp { namespace small_gicp {
@ -89,7 +90,7 @@ public:
v.reserve(k); v.reserve(k);
std::priority_queue<IndexDistance> queue(std::less<IndexDistance>(), std::move(v)); std::priority_queue<IndexDistance> queue(std::less<IndexDistance>(), std::move(v));
const Eigen::Vector3i center_coord = (pt * inv_leaf_size).array().floor().cast<int>().head<3>(); const Eigen::Vector3i center_coord = fast_floor(pt * inv_leaf_size).head<3>();
for (const auto& offset : offsets) { for (const auto& offset : offsets) {
const Eigen::Vector3i coord = center_coord + offset; const Eigen::Vector3i coord = center_coord + offset;
const auto voxel = find_voxel(coord); const auto voxel = find_voxel(coord);
@ -133,7 +134,7 @@ private:
std::vector<Eigen::Vector3i> coords(traits::size(points)); std::vector<Eigen::Vector3i> coords(traits::size(points));
tbb::parallel_for(static_cast<size_t>(0), static_cast<size_t>(traits::size(points)), [&](size_t i) { tbb::parallel_for(static_cast<size_t>(0), static_cast<size_t>(traits::size(points)), [&](size_t i) {
const Eigen::Vector4d pt = traits::point(points, i); const Eigen::Vector4d pt = traits::point(points, i);
const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().template cast<int>().template head<3>(); const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).template head<3>();
coords[i] = coord; coords[i] = coord;
const size_t hash = XORVector3iHash::hash(coord); const size_t hash = XORVector3iHash::hash(coord);

View File

@ -4,6 +4,7 @@
#include <small_gicp/ann/traits.hpp> #include <small_gicp/ann/traits.hpp>
#include <small_gicp/points/traits.hpp> #include <small_gicp/points/traits.hpp>
#include <small_gicp/util/fast_floor.hpp>
#include <small_gicp/util/vector3i_hash.hpp> #include <small_gicp/util/vector3i_hash.hpp>
namespace small_gicp { namespace small_gicp {
@ -78,7 +79,7 @@ public:
// Insert points to the voxelmap // Insert points to the voxelmap
for (size_t i = 0; i < traits::size(points); i++) { for (size_t i = 0; i < traits::size(points); i++) {
const Eigen::Vector4d pt = T * traits::point(points, i); const Eigen::Vector4d pt = T * traits::point(points, i);
const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().cast<int>().head<3>(); const Eigen::Vector3i coord = fast_loor(pt * inv_leaf_size).head<3>();
auto found = voxels.find(coord); auto found = voxels.find(coord);
if (found == voxels.end()) { if (found == voxels.end()) {
@ -110,7 +111,7 @@ public:
/// @brief Find the nearest neighbor /// @brief Find the nearest neighbor
size_t nearest_neighbor_search(const Eigen::Vector4d& pt, size_t* k_index, double* k_sq_dist) const { size_t nearest_neighbor_search(const Eigen::Vector4d& pt, size_t* k_index, double* k_sq_dist) const {
const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().cast<int>().head<3>(); const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).head<3>();
const auto found = voxels.find(coord); const auto found = voxels.find(coord);
if (found == voxels.end()) { if (found == voxels.end()) {
return 0; return 0;

View File

@ -5,6 +5,7 @@
#include <algorithm> #include <algorithm>
#include <filesystem> #include <filesystem>
#include <Eigen/Core> #include <Eigen/Core>
#include <fmt/format.h>
#include <small_gicp/points/traits.hpp> #include <small_gicp/points/traits.hpp>
#include <small_gicp/util/read_points.hpp> #include <small_gicp/util/read_points.hpp>
@ -12,16 +13,55 @@ namespace small_gicp {
struct Stopwatch { struct Stopwatch {
public: public:
void start() { t1 = std::chrono::high_resolution_clock::now(); } void start() { t1 = t2 = std::chrono::high_resolution_clock::now(); }
void stop() { t2 = std::chrono::high_resolution_clock::now(); } void stop() { t2 = std::chrono::high_resolution_clock::now(); }
double elapsed_sec() const { return std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e9; } void lap() {
double elapsed_msec() const { return std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6; } t1 = t2;
t2 = std::chrono::high_resolution_clock::now();
}
double sec() const { return std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e9; }
double msec() const { return std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6; }
public: public:
std::chrono::high_resolution_clock::time_point t1; std::chrono::high_resolution_clock::time_point t1;
std::chrono::high_resolution_clock::time_point t2; std::chrono::high_resolution_clock::time_point t2;
}; };
struct Summarizer {
public:
Summarizer() { results.reserve(8192); }
void push(double x) { results.emplace_back(x); }
std::pair<double, double> mean_std() const {
const double sum = std::accumulate(results.begin(), results.end(), 0.0);
const double sum_sq = std::accumulate(results.begin(), results.end(), 0.0, [](double sum, double x) { return sum + x * x; });
const double mean = sum / results.size();
const double var = (sum_sq - mean * sum) / results.size();
return {mean, std::sqrt(var)};
}
std::string str() const {
const auto [mean, std] = mean_std();
return fmt::format("{:.3f} +- {:.3f}", mean, std);
}
private:
std::vector<double> results;
};
template <typename Container, typename Transform>
std::string summarize(const Container& container, const Transform& transform) {
Summarizer summarizer;
for (auto itr = std::begin(container); itr != std::end(container); itr++) {
summarizer.push(transform(*itr));
}
return summarizer.str();
}
struct KittiDataset { struct KittiDataset {
public: public:
KittiDataset(const std::string& dataset_path, size_t max_num_data = std::numeric_limits<size_t>::max()) { KittiDataset(const std::string& dataset_path, size_t max_num_data = std::numeric_limits<size_t>::max()) {
@ -39,9 +79,8 @@ public:
filenames.resize(max_num_data); filenames.resize(max_num_data);
} }
points.resize(max_num_data); points.resize(filenames.size());
std::ranges::transform(filenames, points.begin(), [](const std::string& filename) { return read_points(filename); }); std::ranges::transform(filenames, points.begin(), [](const std::string& filename) { return read_points(filename); });
std::erase_if(points, [](const auto& p) { return p.empty(); });
} }
template <typename PointCloud> template <typename PointCloud>

View File

@ -4,6 +4,7 @@
#include <random> #include <random>
#include <unordered_map> #include <unordered_map>
#include <small_gicp/points/traits.hpp> #include <small_gicp/points/traits.hpp>
#include <small_gicp/util/fast_floor.hpp>
#include <small_gicp/util/vector3i_hash.hpp> #include <small_gicp/util/vector3i_hash.hpp>
namespace small_gicp { namespace small_gicp {
@ -28,8 +29,7 @@ std::shared_ptr<OutputPointCloud> voxelgrid_sampling(const InputPointCloud& poin
std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size()); std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size());
for (size_t i = 0; i < traits::size(points); i++) { for (size_t i = 0; i < traits::size(points); i++) {
// TODO: Check if coord in 21bit range // TODO: Check if coord in 21bit range
const auto& pt = traits::point(points, i); const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset;
const Eigen::Array4i coord = (pt * inv_leaf_size).array().floor().template cast<int>() + coord_offset;
// Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit) // Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit)
const std::uint64_t bits = // const std::uint64_t bits = //

View File

@ -5,6 +5,7 @@
#include <tbb/tbb.h> #include <tbb/tbb.h>
#include <small_gicp/points/traits.hpp> #include <small_gicp/points/traits.hpp>
#include <small_gicp/util/fast_floor.hpp>
#include <small_gicp/util/vector3i_hash.hpp> #include <small_gicp/util/vector3i_hash.hpp>
namespace small_gicp { namespace small_gicp {
@ -28,8 +29,7 @@ std::shared_ptr<OutputPointCloud> voxelgrid_sampling_tbb(const InputPointCloud&
std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size()); std::vector<std::pair<std::uint64_t, size_t>> coord_pt(points.size());
tbb::parallel_for(size_t(0), size_t(traits::size(points)), [&](size_t i) { tbb::parallel_for(size_t(0), size_t(traits::size(points)), [&](size_t i) {
// TODO: Check if coord in 21bit range // TODO: Check if coord in 21bit range
const Eigen::Vector4d pt = traits::point(points, i); const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset;
const Eigen::Array4i coord = (pt * inv_leaf_size).array().floor().template cast<int>() + coord_offset;
// Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit) // Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit)
const std::uint64_t bits = // const std::uint64_t bits = //

View File

@ -0,0 +1,15 @@
#pragma once
#include <Eigen/Core>
namespace small_gicp {
/// @brief Fast floor (https://stackoverflow.com/questions/824118/why-is-floor-so-slow).
/// @param pt Double vector
/// @return Floored int vector
inline Eigen::Array4i fast_floor(const Eigen::Array4d& pt) {
const Eigen::Array4i ncoord = pt.cast<int>();
return ncoord - (pt < ncoord.cast<double>()).cast<int>();
};
} // namespace small_gicp

View File

@ -1,3 +1,5 @@
#include <fmt/format.h>
#include <pcl/point_types.h> #include <pcl/point_types.h>
#include <pcl/point_cloud.h> #include <pcl/point_cloud.h>
#include <pcl/filters/voxel_grid.h> #include <pcl/filters/voxel_grid.h>
@ -6,57 +8,79 @@
#include <small_gicp/util/benchmark.hpp> #include <small_gicp/util/benchmark.hpp>
#include <small_gicp/util/downsampling.hpp> #include <small_gicp/util/downsampling.hpp>
#include <small_gicp/util/downsampling_tbb.hpp> #include <small_gicp/util/downsampling_tbb.hpp>
#include <small_gicp/util/normal_estimation_tbb.hpp>
#include <small_gicp/points/point_cloud.hpp> #include <small_gicp/points/point_cloud.hpp>
#include <small_gicp/points/pcl_point.hpp> #include <small_gicp/points/pcl_point.hpp>
#include <small_gicp/points/pcl_point_traits.hpp> #include <small_gicp/points/pcl_point_traits.hpp>
#include <guik/viewer/light_viewer.hpp> namespace small_gicp {
template <typename PointCloudPtr, typename Func>
void benchmark(const std::vector<PointCloudPtr>& raw_points, double leaf_size, const Func& downsample) {
Stopwatch sw;
Summarizer times;
Summarizer num_points;
sw.start();
for (const auto& points : raw_points) {
auto downsampled = downsample(points, leaf_size);
sw.lap();
times.push(sw.msec());
num_points.push(downsampled->size());
}
std::cout << fmt::format("{} [msec/scan] {} [points]", times.str(), num_points.str()) << std::endl;
}
template <typename VoxelGrid, typename PointCloudPtr>
auto downsample_pcl(const PointCloudPtr& points, double leaf_size) {
VoxelGrid voxelgrid;
voxelgrid.setLeafSize(leaf_size, leaf_size, leaf_size);
voxelgrid.setInputCloud(points);
auto downsampled = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
voxelgrid.filter(*downsampled);
return downsampled;
}
} // namespace small_gicp
int main(int argc, char** argv) { int main(int argc, char** argv) {
using namespace small_gicp; using namespace small_gicp;
std::cout << "SIMD=" << Eigen::SimdInstructionSetsInUse() << std::endl; std::cout << "SIMD in use: " << Eigen::SimdInstructionSetsInUse() << std::endl;
const std::string dataset_path = "/home/koide/datasets/velodyne"; const std::string dataset_path = "/home/koide/datasets/velodyne";
std::cout << "Load dataset from " << dataset_path << std::endl; std::cout << "Load dataset from " << dataset_path << std::endl;
Stopwatch sw;
sw.start();
KittiDataset kitti(dataset_path, 1000); KittiDataset kitti(dataset_path, 1000);
sw.stop(); std::cout << "num_frames=" << kitti.points.size() << std::endl;
std::cout << "load=" << sw.elapsed_sec() << "s" << std::endl; std::cout << fmt::format("num_points={} [points]", summarize(kitti.points, [](const auto& pts) { return pts.size(); })) << std::endl;
size_t sum_num_points = 0;
const auto points_pcl = kitti.convert<pcl::PointCloud<pcl::PointXYZ>>(); const auto points_pcl = kitti.convert<pcl::PointCloud<pcl::PointXYZ>>();
sw.start(); std::cout << "---" << std::endl;
sum_num_points = 0; std::cout << "leaf_size=0.1(warmup)" << std::endl;
for (size_t i = 0; i < points_pcl.size(); i++) { std::cout << fmt::format("{:25}: ", "pcl_voxelgrid") << std::flush;
auto downsampled = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>(); benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::VoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
std::cout << fmt::format("{:25}: ", "pcl_approx_voxelgrid") << std::flush;
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::ApproximateVoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
std::cout << fmt::format("{:25}: ", "small_voxelgrid") << std::flush;
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); });
std::cout << fmt::format("{:25}: ", "small_voxelgrid_tbb") << std::flush;
benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); });
pcl::VoxelGrid<pcl::PointXYZ> voxelgrid; for (double leaf_size = 1.0; leaf_size <= 1.51; leaf_size += 0.1) {
voxelgrid.setLeafSize(0.2, 0.2, 0.2); std::cout << "---" << std::endl;
voxelgrid.setInputCloud(points_pcl[i]); std::cout << "leaf_size=" << leaf_size << std::endl;
std::cout << fmt::format("{:25}: ", "pcl_voxelgrid") << std::flush;
voxelgrid.filter(*downsampled); benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::VoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
sum_num_points += downsampled->size(); std::cout << fmt::format("{:25}: ", "pcl_approx_voxelgrid") << std::flush;
benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return downsample_pcl<pcl::ApproximateVoxelGrid<pcl::PointXYZ>>(points, leaf_size); });
std::cout << fmt::format("{:25}: ", "small_voxelgrid") << std::flush;
benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); });
std::cout << fmt::format("{:25}: ", "small_voxelgrid_tbb") << std::flush;
benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); });
} }
sw.stop();
std::cout << "filter=" << sw.elapsed_sec() << "s avg_num_points=" << static_cast<double>(sum_num_points) / points_pcl.size() << std::endl;
const auto points = kitti.convert<PointCloud>();
sw.start();
sum_num_points = 0;
for (size_t i = 0; i < points.size(); i++) {
auto downsampled = voxelgrid_sampling_tbb(*points[i], 0.2);
sum_num_points += downsampled->size();
}
sw.stop();
std::cout << "filter=" << sw.elapsed_sec() << "s avg_num_points=" << static_cast<double>(sum_num_points) / points.size() << std::endl;
return 0; return 0;
} }

View File

@ -46,7 +46,7 @@ int main(int argc, char** argv) {
prof.push("copy"); prof.push("copy");
auto points = std::make_shared<PointCloud>(raw_points); auto points = std::make_shared<PointCloud>(raw_points);
prof.push("downsample"); prof.push("downsample");
auto downsampled = approx_voxelgrid_sampling_tbb(*points, 0.2); auto downsampled = voxelgrid_sampling_tbb(*points, 0.2);
prof.push("estimate covs"); prof.push("estimate covs");
estimate_covariances_omp(*downsampled, 10); estimate_covariances_omp(*downsampled, 10);
prof.push("create flat voxels"); prof.push("create flat voxels");