From 2b101cda7a45c004767792d0003b253919c5133b Mon Sep 17 00:00:00 2001 From: "k.koide" Date: Sun, 24 Mar 2024 17:44:20 +0900 Subject: [PATCH] fast_floor --- CMakeLists.txt | 21 ++++- include/small_gicp/ann/cached_kdtree.hpp | 3 +- include/small_gicp/ann/flat_voxelmap.hpp | 5 +- include/small_gicp/ann/gaussian_voxelmap.hpp | 5 +- include/small_gicp/util/benchmark.hpp | 49 +++++++++-- include/small_gicp/util/downsampling.hpp | 4 +- include/small_gicp/util/downsampling_tbb.hpp | 4 +- include/small_gicp/util/fast_floor.hpp | 15 ++++ src/downsampling_benchmark.cpp | 92 ++++++++++++-------- src/downsampling_test.cpp | 2 +- 10 files changed, 149 insertions(+), 51 deletions(-) create mode 100644 include/small_gicp/util/fast_floor.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fd43534..b8e1c94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ if(BUILD_WITH_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "-march=native ${CMAKE_CXX_FLAGS}") endif() +find_package(fmt REQUIRED) find_package(PCL REQUIRED) find_package(TBB REQUIRED) find_package(Eigen3 REQUIRED) @@ -49,6 +50,23 @@ endif() # ${Iridescence_LIBRARIES} # ) +add_executable(downsampling_test + src/downsampling_test.cpp +) +target_include_directories(downsampling_test PUBLIC + include + ${PCL_INCLUDE_DIRS} + ${TBB_INCLUDE_DIRS} + ${EIGEN3_INCLUDE_DIR} + ${Iridescence_INCLUDE_DIRS} +) +target_link_libraries(downsampling_test + fmt::fmt + ${PCL_LIBRARIES} + ${TBB_LIBRARIES} + ${Iridescence_LIBRARIES} +) + add_executable(downsampling_benchmark src/downsampling_benchmark.cpp ) @@ -57,10 +75,9 @@ target_include_directories(downsampling_benchmark PUBLIC ${PCL_INCLUDE_DIRS} ${TBB_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR} - ${Iridescence_INCLUDE_DIRS} ) target_link_libraries(downsampling_benchmark + fmt::fmt ${PCL_LIBRARIES} ${TBB_LIBRARIES} - ${Iridescence_LIBRARIES} ) \ No newline at end of file diff --git a/include/small_gicp/ann/cached_kdtree.hpp b/include/small_gicp/ann/cached_kdtree.hpp index af6080a..feaad46 100644 --- a/include/small_gicp/ann/cached_kdtree.hpp +++ b/include/small_gicp/ann/cached_kdtree.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace small_gicp { @@ -21,7 +22,7 @@ public: CachedKdTree(const PointCloud& points, double leaf_size) : inv_leaf_size(1.0 / leaf_size), kdtree(points) {} size_t knn_search(const Eigen::Vector4d& pt, size_t k, size_t* k_indices, double* k_sq_dists) const { - const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().cast().head<3>(); + const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).head<3>(); CacheTable::const_accessor ca; if (cache.find(ca, coord)) { diff --git a/include/small_gicp/ann/flat_voxelmap.hpp b/include/small_gicp/ann/flat_voxelmap.hpp index ec2b475..a0731c8 100644 --- a/include/small_gicp/ann/flat_voxelmap.hpp +++ b/include/small_gicp/ann/flat_voxelmap.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace small_gicp { @@ -89,7 +90,7 @@ public: v.reserve(k); std::priority_queue queue(std::less(), std::move(v)); - const Eigen::Vector3i center_coord = (pt * inv_leaf_size).array().floor().cast().head<3>(); + const Eigen::Vector3i center_coord = fast_floor(pt * inv_leaf_size).head<3>(); for (const auto& offset : offsets) { const Eigen::Vector3i coord = center_coord + offset; const auto voxel = find_voxel(coord); @@ -133,7 +134,7 @@ private: std::vector coords(traits::size(points)); tbb::parallel_for(static_cast(0), static_cast(traits::size(points)), [&](size_t i) { const Eigen::Vector4d pt = traits::point(points, i); - const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().template cast().template head<3>(); + const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).template head<3>(); coords[i] = coord; const size_t hash = XORVector3iHash::hash(coord); diff --git a/include/small_gicp/ann/gaussian_voxelmap.hpp b/include/small_gicp/ann/gaussian_voxelmap.hpp index ff44cc4..59e8e4f 100644 --- a/include/small_gicp/ann/gaussian_voxelmap.hpp +++ b/include/small_gicp/ann/gaussian_voxelmap.hpp @@ -4,6 +4,7 @@ #include #include +#include #include namespace small_gicp { @@ -78,7 +79,7 @@ public: // Insert points to the voxelmap for (size_t i = 0; i < traits::size(points); i++) { const Eigen::Vector4d pt = T * traits::point(points, i); - const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().cast().head<3>(); + const Eigen::Vector3i coord = fast_loor(pt * inv_leaf_size).head<3>(); auto found = voxels.find(coord); if (found == voxels.end()) { @@ -110,7 +111,7 @@ public: /// @brief Find the nearest neighbor size_t nearest_neighbor_search(const Eigen::Vector4d& pt, size_t* k_index, double* k_sq_dist) const { - const Eigen::Vector3i coord = (pt * inv_leaf_size).array().floor().cast().head<3>(); + const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).head<3>(); const auto found = voxels.find(coord); if (found == voxels.end()) { return 0; diff --git a/include/small_gicp/util/benchmark.hpp b/include/small_gicp/util/benchmark.hpp index dfa96da..67ee0b2 100644 --- a/include/small_gicp/util/benchmark.hpp +++ b/include/small_gicp/util/benchmark.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -12,16 +13,55 @@ namespace small_gicp { struct Stopwatch { public: - void start() { t1 = std::chrono::high_resolution_clock::now(); } + void start() { t1 = t2 = std::chrono::high_resolution_clock::now(); } void stop() { t2 = std::chrono::high_resolution_clock::now(); } - double elapsed_sec() const { return std::chrono::duration_cast(t2 - t1).count() / 1e9; } - double elapsed_msec() const { return std::chrono::duration_cast(t2 - t1).count() / 1e6; } + void lap() { + t1 = t2; + t2 = std::chrono::high_resolution_clock::now(); + } + + double sec() const { return std::chrono::duration_cast(t2 - t1).count() / 1e9; } + double msec() const { return std::chrono::duration_cast(t2 - t1).count() / 1e6; } public: std::chrono::high_resolution_clock::time_point t1; std::chrono::high_resolution_clock::time_point t2; }; +struct Summarizer { +public: + Summarizer() { results.reserve(8192); } + + void push(double x) { results.emplace_back(x); } + + std::pair mean_std() const { + const double sum = std::accumulate(results.begin(), results.end(), 0.0); + const double sum_sq = std::accumulate(results.begin(), results.end(), 0.0, [](double sum, double x) { return sum + x * x; }); + + const double mean = sum / results.size(); + const double var = (sum_sq - mean * sum) / results.size(); + + return {mean, std::sqrt(var)}; + } + + std::string str() const { + const auto [mean, std] = mean_std(); + return fmt::format("{:.3f} +- {:.3f}", mean, std); + } + +private: + std::vector results; +}; + +template +std::string summarize(const Container& container, const Transform& transform) { + Summarizer summarizer; + for (auto itr = std::begin(container); itr != std::end(container); itr++) { + summarizer.push(transform(*itr)); + } + return summarizer.str(); +} + struct KittiDataset { public: KittiDataset(const std::string& dataset_path, size_t max_num_data = std::numeric_limits::max()) { @@ -39,9 +79,8 @@ public: filenames.resize(max_num_data); } - points.resize(max_num_data); + points.resize(filenames.size()); std::ranges::transform(filenames, points.begin(), [](const std::string& filename) { return read_points(filename); }); - std::erase_if(points, [](const auto& p) { return p.empty(); }); } template diff --git a/include/small_gicp/util/downsampling.hpp b/include/small_gicp/util/downsampling.hpp index 1fd62d3..824a9c0 100644 --- a/include/small_gicp/util/downsampling.hpp +++ b/include/small_gicp/util/downsampling.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace small_gicp { @@ -28,8 +29,7 @@ std::shared_ptr voxelgrid_sampling(const InputPointCloud& poin std::vector> coord_pt(points.size()); for (size_t i = 0; i < traits::size(points); i++) { // TODO: Check if coord in 21bit range - const auto& pt = traits::point(points, i); - const Eigen::Array4i coord = (pt * inv_leaf_size).array().floor().template cast() + coord_offset; + const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset; // Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit) const std::uint64_t bits = // diff --git a/include/small_gicp/util/downsampling_tbb.hpp b/include/small_gicp/util/downsampling_tbb.hpp index 05c6043..e52f0d1 100644 --- a/include/small_gicp/util/downsampling_tbb.hpp +++ b/include/small_gicp/util/downsampling_tbb.hpp @@ -5,6 +5,7 @@ #include #include +#include #include namespace small_gicp { @@ -28,8 +29,7 @@ std::shared_ptr voxelgrid_sampling_tbb(const InputPointCloud& std::vector> coord_pt(points.size()); tbb::parallel_for(size_t(0), size_t(traits::size(points)), [&](size_t i) { // TODO: Check if coord in 21bit range - const Eigen::Vector4d pt = traits::point(points, i); - const Eigen::Array4i coord = (pt * inv_leaf_size).array().floor().template cast() + coord_offset; + const Eigen::Array4i coord = fast_floor(traits::point(points, i) * inv_leaf_size) + coord_offset; // Compute voxel coord bits (0|1bit, z|21bit, y|21bit, x|21bit) const std::uint64_t bits = // diff --git a/include/small_gicp/util/fast_floor.hpp b/include/small_gicp/util/fast_floor.hpp new file mode 100644 index 0000000..691c309 --- /dev/null +++ b/include/small_gicp/util/fast_floor.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace small_gicp { + +/// @brief Fast floor (https://stackoverflow.com/questions/824118/why-is-floor-so-slow). +/// @param pt Double vector +/// @return Floored int vector +inline Eigen::Array4i fast_floor(const Eigen::Array4d& pt) { + const Eigen::Array4i ncoord = pt.cast(); + return ncoord - (pt < ncoord.cast()).cast(); +}; + +} // namespace small_gicp diff --git a/src/downsampling_benchmark.cpp b/src/downsampling_benchmark.cpp index f1fed85..63b2dd4 100644 --- a/src/downsampling_benchmark.cpp +++ b/src/downsampling_benchmark.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -6,57 +8,79 @@ #include #include #include -#include #include #include #include -#include +namespace small_gicp { + +template +void benchmark(const std::vector& raw_points, double leaf_size, const Func& downsample) { + Stopwatch sw; + Summarizer times; + Summarizer num_points; + + sw.start(); + for (const auto& points : raw_points) { + auto downsampled = downsample(points, leaf_size); + + sw.lap(); + times.push(sw.msec()); + num_points.push(downsampled->size()); + } + + std::cout << fmt::format("{} [msec/scan] {} [points]", times.str(), num_points.str()) << std::endl; +} + +template +auto downsample_pcl(const PointCloudPtr& points, double leaf_size) { + VoxelGrid voxelgrid; + voxelgrid.setLeafSize(leaf_size, leaf_size, leaf_size); + voxelgrid.setInputCloud(points); + + auto downsampled = pcl::make_shared>(); + voxelgrid.filter(*downsampled); + return downsampled; +} + +} // namespace small_gicp int main(int argc, char** argv) { using namespace small_gicp; - std::cout << "SIMD=" << Eigen::SimdInstructionSetsInUse() << std::endl; - + std::cout << "SIMD in use: " << Eigen::SimdInstructionSetsInUse() << std::endl; const std::string dataset_path = "/home/koide/datasets/velodyne"; std::cout << "Load dataset from " << dataset_path << std::endl; - - Stopwatch sw; - sw.start(); KittiDataset kitti(dataset_path, 1000); - sw.stop(); - std::cout << "load=" << sw.elapsed_sec() << "s" << std::endl; - - size_t sum_num_points = 0; + std::cout << "num_frames=" << kitti.points.size() << std::endl; + std::cout << fmt::format("num_points={} [points]", summarize(kitti.points, [](const auto& pts) { return pts.size(); })) << std::endl; const auto points_pcl = kitti.convert>(); - sw.start(); - sum_num_points = 0; - for (size_t i = 0; i < points_pcl.size(); i++) { - auto downsampled = pcl::make_shared>(); + std::cout << "---" << std::endl; + std::cout << "leaf_size=0.1(warmup)" << std::endl; + std::cout << fmt::format("{:25}: ", "pcl_voxelgrid") << std::flush; + benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return downsample_pcl>(points, leaf_size); }); + std::cout << fmt::format("{:25}: ", "pcl_approx_voxelgrid") << std::flush; + benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return downsample_pcl>(points, leaf_size); }); + std::cout << fmt::format("{:25}: ", "small_voxelgrid") << std::flush; + benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); }); + std::cout << fmt::format("{:25}: ", "small_voxelgrid_tbb") << std::flush; + benchmark(points_pcl, 0.1, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); }); - pcl::VoxelGrid voxelgrid; - voxelgrid.setLeafSize(0.2, 0.2, 0.2); - voxelgrid.setInputCloud(points_pcl[i]); - - voxelgrid.filter(*downsampled); - sum_num_points += downsampled->size(); + for (double leaf_size = 1.0; leaf_size <= 1.51; leaf_size += 0.1) { + std::cout << "---" << std::endl; + std::cout << "leaf_size=" << leaf_size << std::endl; + std::cout << fmt::format("{:25}: ", "pcl_voxelgrid") << std::flush; + benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return downsample_pcl>(points, leaf_size); }); + std::cout << fmt::format("{:25}: ", "pcl_approx_voxelgrid") << std::flush; + benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return downsample_pcl>(points, leaf_size); }); + std::cout << fmt::format("{:25}: ", "small_voxelgrid") << std::flush; + benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return voxelgrid_sampling(*points, leaf_size); }); + std::cout << fmt::format("{:25}: ", "small_voxelgrid_tbb") << std::flush; + benchmark(points_pcl, leaf_size, [](const auto& points, double leaf_size) { return voxelgrid_sampling_tbb(*points, leaf_size); }); } - sw.stop(); - std::cout << "filter=" << sw.elapsed_sec() << "s avg_num_points=" << static_cast(sum_num_points) / points_pcl.size() << std::endl; - - const auto points = kitti.convert(); - - sw.start(); - sum_num_points = 0; - for (size_t i = 0; i < points.size(); i++) { - auto downsampled = voxelgrid_sampling_tbb(*points[i], 0.2); - sum_num_points += downsampled->size(); - } - sw.stop(); - std::cout << "filter=" << sw.elapsed_sec() << "s avg_num_points=" << static_cast(sum_num_points) / points.size() << std::endl; return 0; } diff --git a/src/downsampling_test.cpp b/src/downsampling_test.cpp index 3b84d3e..e6afc0d 100644 --- a/src/downsampling_test.cpp +++ b/src/downsampling_test.cpp @@ -46,7 +46,7 @@ int main(int argc, char** argv) { prof.push("copy"); auto points = std::make_shared(raw_points); prof.push("downsample"); - auto downsampled = approx_voxelgrid_sampling_tbb(*points, 0.2); + auto downsampled = voxelgrid_sampling_tbb(*points, 0.2); prof.push("estimate covs"); estimate_covariances_omp(*downsampled, 10); prof.push("create flat voxels");