update benchmark_kdtree

This commit is contained in:
k.koide 2024-04-01 20:40:30 +09:00
parent fe336935fb
commit 734bc905c8
6 changed files with 107 additions and 67 deletions

View File

@ -5,8 +5,8 @@
- **Highly Optimized** : The implementation of the core registration algorithm is further optimized from that in fast_gicp. It enables up to **2x speed gain** compared to fast_gicp.
- **All parallerized** : small_gicp offers parallelized implementations of several preprocessing algorithms to make the entire registration process parallelized (Downsampling, KdTree construction, Normal/covariance estimation). As a parallelism backend, either (or both) of [OpenMP](https://www.openmp.org/) and [Intel TBB](https://github.com/oneapi-src/oneTBB) can be used.
- **Minimum dependency** : Only [Eigen](https://eigen.tuxfamily.org/) (and bundled [nanoflann](https://github.com/jlblancoc/nanoflann) and [Sophus](https://github.com/strasdat/Sophus)) are required at a minimum. Optionally, it provides the [PCL](https://pointclouds.org/) registration interface so that it can be used as a drop-in replacement in many systems.
- **Customizable** : small_gicp is implemented with the trait mechanism that enables feeding any custom point cloud class to the registration algorithm. Furthermore, the template-based implementation allows customizing the regisration process with your original correspondence estimator and registration factors.
- **Python bindinds** : The isolation from PCL makes small_gicp's python bindinds more portable and connectable to other libraries seamlessly.
- **Customizable** : small_gicp allows feeding any custom point cloud class to the registration algorithm via traits. Furthermore, the template-based implementation enables customizing the regisration process with your original correspondence estimator and registration factors.
- **Python bindinds** : The isolation from PCL makes small_gicp's python bindinds more portable and connectable to other libraries (e.g., Open3D) without problems.
Note that GPU-based implementations are NOT included in this package.
@ -27,7 +27,7 @@ This library uses some C++20 features. While porting it to older environments sh
small_gicp is a header-only library. You can just download and drop it in your project directory to use it.
Meanwhile, if you need only basic point cloud registration, you can build and install the helper library as follows.
If you need only basic point cloud registration functions, you can build and install the helper library as follows.
```bash
sudo apt-get install libeigen3-dev libomp-dev
@ -60,7 +60,7 @@ The following examples assume `using namespace small_gicp` is placed somewhere.
The helper library (`registration_helper.hpp`) enables easily processing point clouds represented as `std::vector<Eigen::Vector(3|4)(f|d)>`.
<details><summary>Expand</summary>
`small_gicp::align` takes two point clouds (`std::vectors` of `Eigen::Vector(3|4)(f|d)`) and returns a registration result (estimated transformation and some information on the optimization result). This is the easiest way to use **small_gicp** but causes an overhead for duplicated preprocessing.
`small_gicp::align` takes two point clouds (`std::vectors` of `Eigen::Vector(3|4)(f|d)`) and returns a registration result (estimated transformation and some information on the optimization result). This is the easiest way to use small_gicp but causes an overhead for duplicated preprocessing.
```cpp
#include <small_gicp/registration/registration_helper.hpp>
@ -111,9 +111,9 @@ Eigen::Matrix<double, 6, 6> H = result.H; // Final Hessian matrix (6x6)
</details>
### Using with PCL interface ([02_basic_resigtration_pcl.cpp](src/example/02_basic_resigtration_pcl.cpp))
### Using PCL interface ([02_basic_resigtration_pcl.cpp](src/example/02_basic_resigtration_pcl.cpp))
The PCL interface allows using small_gicp as a drop-in replacement for `pcl::GeneralizedIterativeClosestPoint`. It is also possible to directly feed `pcl::PointCloud` to algorithms implemented in small_gicp.
The PCL interface allows using small_gicp as a drop-in replacement for `pcl::Registration`. It is also possible to directly feed `pcl::PointCloud` to algorithms implemented in small_gicp.
<details><summary>Expand</summary>
@ -233,7 +233,7 @@ size_t num_inliers = result.num_inliers; // Number of inlier source points
Eigen::Matrix<double, 6, 6> H = result.H; // Final Hessian matrix (6x6)
```
See [03_registration_template.cpp](src/example/03_registration_template.cpp) for more detailed customization example.
See [03_registration_template.cpp](src/example/03_registration_template.cpp) for more detailed customization examples.
</details>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 241 KiB

After

Width:  |  Height:  |  Size: 234 KiB

View File

@ -3,6 +3,7 @@
#pragma once
#include <chrono>
#include <deque>
#include <vector>
#include <numeric>
#include <algorithm>
@ -33,13 +34,15 @@ public:
struct Summarizer {
public:
Summarizer() : num_data(0), sum(0.0), sq_sum(0.0), last_x(0.0) {}
Summarizer(bool full_log = false) : full_log(full_log), num_data(0), sum(0.0), sq_sum(0.0), last_x(0.0) {}
void push(double x) {
num_data++;
sum += x;
sq_sum += x * x;
last_x = x;
full_data.emplace_back(x);
}
std::pair<double, double> mean_std() const {
@ -48,14 +51,33 @@ public:
return {mean, std::sqrt(var)};
}
double median() const {
if (!full_log || full_data.empty()) {
return std::nan("");
}
std::vector<double> sorted(full_data.begin(), full_data.end());
std::ranges::nth_element(sorted, sorted.begin() + sorted.size() / 2);
return sorted[sorted.size() / 2];
}
double last() const { return last_x; }
std::string str() const {
if (full_log) {
const auto [mean, std] = mean_std();
const double med = median();
return fmt::format("{:.3f} +- {:.3f} (median={:.3f})", mean, std, med);
}
const auto [mean, std] = mean_std();
return fmt::format("{:.3f} +- {:.3f} (last={:.3f})", mean, std, last_x);
}
private:
bool full_log;
std::deque<double> full_data;
size_t num_data;
double sum;
double sq_sum;

View File

@ -38,32 +38,35 @@ def parse_result(filename):
def main():
results_path = os.path.dirname(__file__) + '/results'
results = []
results = {}
for filename in os.listdir(results_path):
matched = re.match(r'kdtree_benchmark_(\d+).txt', filename)
matched = re.match(r'kdtree_benchmark_(\S+)_(\d+).txt', filename)
if not matched:
continue
results.append(parse_result(results_path + '/' + filename))
method = '{}_{}'.format(matched.group(1), matched.group(2))
print(method)
results = sorted(results, key=lambda x: x[0])
num_threads = [x[0] for x in results]
num_points = results[0][1]
num_threads, num_points, rets = parse_result(results_path + '/' + filename)
results[method] = rets[list(rets.keys())[0]]
fig, axes = pyplot.subplots(1, 2, figsize=(12, 2))
fig, axes = pyplot.subplots(1, 2, figsize=(12, 3))
axes[0].plot(num_points, results[0][2]['kdtree'], label='kdtree (nanoflann)', marker='o', linestyle='--')
num_threads = [1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128]
axes[0].plot(num_points, results['small_1'], label='kdtree (nanoflann)', marker='o', linestyle='--')
for idx in [1, 3, 5, 7, 8]:
N = num_threads[idx]
axes[0].plot(num_points, results[idx][2]['kdtree_omp'], label='kdtree_omp (%d threads)' % N, marker='s')
axes[0].plot(num_points, results[idx][2]['kdtree_tbb'], label='kdtree_tbb (%d threads)' % N, marker='^')
axes[0].plot(num_points, results['omp_{}'.format(N)], label='kdtree_omp (%d threads)' % N, marker='s')
axes[0].plot(num_points, results['tbb_{}'.format(N)], label='kdtree_tbb (%d threads)' % N, marker='^')
baseline = numpy.array(results[0][2]['kdtree'])
baseline = numpy.array(results['small_1'])
axes[1].plot([num_threads[0], num_threads[-1]], [1.0, 1.0], label='kdtree (nanoflann)', linestyle='--')
for idx in [5]:
threads = num_threads[idx]
N = num_points[idx]
axes[1].plot(num_threads, baseline[idx] / [x[2]['kdtree_omp'][idx] for x in results], label='omp (num_points=%d)' % N, marker='s')
axes[1].plot(num_threads, baseline[idx] / [x[2]['kdtree_tbb'][idx] for x in results], label='tbb (num_points=%d)' % N, marker='^')
axes[1].plot(num_threads, baseline[idx] / [results['omp_{}'.format(threads)][idx] for threads in num_threads], label='omp (num_points=%d)' % N, marker='s')
axes[1].plot(num_threads, baseline[idx] / [results['tbb_{}'.format(threads)][idx] for threads in num_threads], label='tbb (num_points=%d)' % N, marker='^')
axes[1].set_xscale('log')

View File

@ -5,8 +5,14 @@ exe_path=../build/kdtree_benchmark
mkdir results
num_threads=(1 2 3 4 5 6 7 8 16 32 64 128)
$exe_path $dataset_path --num_threads 1 --num_trials 1000 --method small | tee results/kdtree_benchmark_small_$N.txt
for N in ${num_threads[@]}; do
sleep 1
echo $exe_path $dataset_path --num_threads $N | tee results/kdtree_benchmark_$N.txt
$exe_path $dataset_path --num_threads $N --num_trials 1000 | tee results/kdtree_benchmark_$N.txt
$exe_path $dataset_path --num_threads $N --num_trials 1000 --method tbb | tee results/kdtree_benchmark_tbb_$N.txt
done
for N in ${num_threads[@]}; do
sleep 1
$exe_path $dataset_path --num_threads $N --num_trials 1000 --method omp | tee results/kdtree_benchmark_omp_$N.txt
done

View File

@ -14,7 +14,7 @@ int main(int argc, char** argv) {
std::cout << "USAGE: kdtree_benchmark <dataset_path>" << std::endl;
std::cout << "OPTIONS:" << std::endl;
std::cout << " --num_threads <value> (default: 4)" << std::endl;
std::cout << " --downsampling_resolution <value> (default: 0.25)" << std::endl;
std::cout << " --method <str> (small|tbb|omp)" << std::endl;
return 0;
}
@ -22,13 +22,16 @@ int main(int argc, char** argv) {
int num_threads = 4;
int num_trials = 100;
std::string method = "small";
for (int i = 0; i < argc; i++) {
for (int i = 1; i < argc; i++) {
const std::string arg = argv[i];
if (arg == "--num_threads") {
num_threads = std::stoi(argv[i + 1]);
} else if (arg == "--num_trials") {
num_trials = std::stoi(argv[i + 1]);
} else if (arg == "--method") {
method = argv[i + 1];
} else if (arg.size() >= 2 && arg.substr(0, 2) == "--") {
std::cerr << "unknown option: " << arg << std::endl;
return 1;
@ -38,6 +41,7 @@ int main(int argc, char** argv) {
std::cout << "dataset_path=" << dataset_path << std::endl;
std::cout << "num_threads=" << num_threads << std::endl;
std::cout << "num_trials=" << num_trials << std::endl;
std::cout << "method=" << method << std::endl;
tbb::global_control tbb_control(tbb::global_control::max_allowed_parallelism, num_threads);
@ -93,55 +97,60 @@ int main(int argc, char** argv) {
std::cout << "---" << std::endl;
// warmup
for (int i = 0; i < 10; i++) {
auto t1 = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - t1).count() < 1) {
auto downsampled = voxelgrid_sampling(*raw_points, 0.1);
UnsafeKdTree<PointCloud> tree(*downsampled);
UnsafeKdTreeTBB<PointCloud> tree_tbb(*downsampled);
UnsafeKdTreeOMP<PointCloud> tree_omp(*downsampled, num_threads);
if (method == "small") {
UnsafeKdTree<PointCloud> tree(*downsampled);
} else if (method == "tbb") {
UnsafeKdTreeTBB<PointCloud> tree(*downsampled);
} else if (method == "omp") {
UnsafeKdTreeOMP<PointCloud> tree(*downsampled, num_threads);
}
}
Stopwatch sw;
for (size_t i = 0; i < downsampling_resolutions.size(); i++) {
if (num_threads != 1) {
break;
if (method == "small") {
for (size_t i = 0; i < downsampling_resolutions.size(); i++) {
if (num_threads != 1) {
break;
}
Summarizer kdtree_times(true);
sw.start();
for (size_t j = 0; j < num_trials; j++) {
UnsafeKdTree<PointCloud> tree(*downsampled[i]);
sw.lap();
kdtree_times.push(sw.msec());
}
std::cout << "kdtree_times=" << kdtree_times.str() << " [msec]" << std::endl;
}
} else if (method == "tbb") {
for (size_t i = 0; i < downsampling_resolutions.size(); i++) {
Summarizer kdtree_tbb_times(true);
sw.start();
for (size_t j = 0; j < num_trials; j++) {
UnsafeKdTreeTBB<PointCloud> tree(*downsampled[i]);
sw.lap();
kdtree_tbb_times.push(sw.msec());
}
Summarizer kdtree_times;
sw.start();
for (size_t j = 0; j < num_trials; j++) {
UnsafeKdTree<PointCloud> tree(*downsampled[i]);
sw.lap();
kdtree_times.push(sw.msec());
std::cout << "kdtree_tbb_times=" << kdtree_tbb_times.str() << " [msec]" << std::endl;
}
std::cout << "kdtree_times=" << kdtree_times.str() << " [msec]" << std::endl;
}
} else if (method == "omp") {
for (size_t i = 0; i < downsampling_resolutions.size(); i++) {
Summarizer kdtree_omp_times(true);
sw.start();
for (size_t j = 0; j < num_trials; j++) {
UnsafeKdTreeOMP<PointCloud> tree(*downsampled[i], num_threads);
sw.lap();
kdtree_omp_times.push(sw.msec());
}
std::cout << "---" << std::endl;
for (size_t i = 0; i < downsampling_resolutions.size(); i++) {
Summarizer kdtree_tbb_times;
sw.start();
for (size_t j = 0; j < num_trials; j++) {
UnsafeKdTreeTBB<PointCloud> tree(*downsampled[i]);
sw.lap();
kdtree_tbb_times.push(sw.msec());
std::cout << "kdtree_omp_times=" << kdtree_omp_times.str() << " [msec]" << std::endl;
}
std::cout << "kdtree_tbb_times=" << kdtree_tbb_times.str() << " [msec]" << std::endl;
}
std::cout << "---" << std::endl;
for (size_t i = 0; i < downsampling_resolutions.size(); i++) {
Summarizer kdtree_omp_times;
sw.start();
for (size_t j = 0; j < num_trials; j++) {
UnsafeKdTreeOMP<PointCloud> tree(*downsampled[i], num_threads);
sw.lap();
kdtree_omp_times.push(sw.msec());
}
std::cout << "kdtree_omp_times=" << kdtree_omp_times.str() << " [msec]" << std::endl;
}
return 0;