diff --git a/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp index ed06bc9..d532612 100644 --- a/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_ct_gicp_factor_impl.hpp @@ -5,6 +5,12 @@ #include #include +#include +#include + +#ifdef GTSAM_POINTS_TBB +#include +#endif namespace gtsam_points { @@ -46,12 +52,16 @@ double IntegratedCT_GICPFactor_::error(const gtsam::Va this->update_correspondences(); } - double sum_errors = 0.0; -#pragma omp parallel for reduction(+ : sum_errors) schedule(guided, 8) num_threads(this->num_threads) - for (int i = 0; i < frame::size(*this->source); i++) { + const auto perpoint_task = [&]( + int i, + Eigen::Matrix* H_target, + Eigen::Matrix* H_source, + Eigen::Matrix* H_target_source, + Eigen::Matrix* b_target, + Eigen::Matrix* b_source) { const long target_index = this->correspondences[i]; if (target_index < 0) { - continue; + return 0.0; } const int time_index = this->time_indices[i]; @@ -60,13 +70,18 @@ double IntegratedCT_GICPFactor_::error(const gtsam::Va const auto& source_pt = frame::point(*this->source, i); const auto& target_pt = frame::point(*this->target, target_index); - Eigen::Vector4d transed_source_pt = pose * source_pt; - Eigen::Vector4d error = transed_source_pt - target_pt; + const Eigen::Vector4d transed_source_pt = pose * source_pt; + const Eigen::Vector4d residual = transed_source_pt - target_pt; + const double error = 0.5 * residual.transpose() * mahalanobis[i] * residual; - sum_errors += 0.5 * error.transpose() * mahalanobis[i] * error; - } + return error; + }; - return sum_errors; + if (is_omp_default()) { + return scan_matching_reduce_omp(perpoint_task, frame::size(*this->source), this->num_threads, nullptr, nullptr, nullptr, nullptr, nullptr); + } else { + return scan_matching_reduce_tbb(perpoint_task, frame::size(*this->source), nullptr, nullptr, nullptr, nullptr, nullptr); + } } template @@ -74,24 +89,16 @@ boost::shared_ptr IntegratedCT_GICPFactor_update_poses(values); this->update_correspondences(); - double sum_errors = 0.0; - std::vector Hs_00(this->num_threads, gtsam::Matrix6::Zero()); - std::vector Hs_01(this->num_threads, gtsam::Matrix6::Zero()); - std::vector Hs_11(this->num_threads, gtsam::Matrix6::Zero()); - std::vector bs_0(this->num_threads, gtsam::Vector6::Zero()); - std::vector bs_1(this->num_threads, gtsam::Vector6::Zero()); - - gtsam::Matrix6 H_00 = gtsam::Matrix6::Zero(); - gtsam::Matrix6 H_01 = gtsam::Matrix6::Zero(); - gtsam::Matrix6 H_11 = gtsam::Matrix6::Zero(); - gtsam::Vector6 b_0 = gtsam::Vector6::Zero(); - gtsam::Vector6 b_1 = gtsam::Vector6::Zero(); - -#pragma omp parallel for reduction(+ : sum_errors) schedule(guided, 8) num_threads(this->num_threads) - for (int i = 0; i < frame::size(*this->source); i++) { + const auto perpoint_task = [&]( + int i, + Eigen::Matrix* H_00, + Eigen::Matrix* H_11, + Eigen::Matrix* H_01, + Eigen::Matrix* b_0, + Eigen::Matrix* b_1) { const long target_index = this->correspondences[i]; if (target_index < 0) { - continue; + return 0.0; } const int time_index = this->time_indices[i]; @@ -108,39 +115,40 @@ boost::shared_ptr IntegratedCT_GICPFactor_(0, 3) = pose.linear(); const Eigen::Vector4d transed_source_pt = pose * source_pt; - const auto& H_error_pose = H_transed_pose; - const Eigen::Vector4d error = transed_source_pt - target_pt; - - const gtsam::Matrix46 H_0 = H_error_pose * H_pose_0; - const gtsam::Matrix46 H_1 = H_error_pose * H_pose_1; + const auto& H_residual_pose = H_transed_pose; + const Eigen::Vector4d residual = transed_source_pt - target_pt; - int thread_num = 0; -#ifdef _OPENMP - thread_num = omp_get_thread_num(); -#endif + const gtsam::Matrix46 H_0 = H_residual_pose * H_pose_0; + const gtsam::Matrix46 H_1 = H_residual_pose * H_pose_1; - const gtsam::Vector4 mahalanobis_error = mahalanobis[i] * error; + const gtsam::Vector4 mahalanobis_residual = mahalanobis[i] * residual; const gtsam::Matrix64 H_0_mahalanobis = H_0.transpose() * mahalanobis[i]; const gtsam::Matrix64 H_1_mahalanobis = H_1.transpose() * mahalanobis[i]; - sum_errors += 0.5 * error.transpose() * mahalanobis_error; - Hs_00[thread_num] += H_0_mahalanobis * H_0; - Hs_11[thread_num] += H_1_mahalanobis * H_1; - Hs_01[thread_num] += H_0_mahalanobis * H_1; - bs_0[thread_num] += H_0.transpose() * mahalanobis_error; - bs_1[thread_num] += H_1.transpose() * mahalanobis_error; + const double error = 0.5 * residual.transpose() * mahalanobis_residual; + *H_00 += H_0_mahalanobis * H_0; + *H_11 += H_1_mahalanobis * H_1; + *H_01 += H_0_mahalanobis * H_1; + *b_0 += H_0.transpose() * mahalanobis_residual; + *b_1 += H_1.transpose() * mahalanobis_residual; + + return error; + }; + + double error = 0.0; + gtsam::Matrix6 H_00; + gtsam::Matrix6 H_01; + gtsam::Matrix6 H_11; + gtsam::Vector6 b_0; + gtsam::Vector6 b_1; + + if (is_omp_default()) { + error = scan_matching_reduce_omp(perpoint_task, frame::size(*this->source), this->num_threads, &H_00, &H_11, &H_01, &b_0, &b_1); + } else { + error = scan_matching_reduce_tbb(perpoint_task, frame::size(*this->source), &H_00, &H_11, &H_01, &b_0, &b_1); } - for (int i = 1; i < Hs_00.size(); i++) { - Hs_00[0] += Hs_00[i]; - Hs_11[0] += Hs_11[i]; - Hs_01[0] += Hs_01[i]; - bs_0[0] += bs_0[i]; - bs_1[0] += bs_1[i]; - } - - auto factor = gtsam::HessianFactor::shared_ptr( - new gtsam::HessianFactor(this->keys_[0], this->keys_[1], Hs_00[0], Hs_01[0], -bs_0[0], Hs_11[0], -bs_1[0], sum_errors)); + auto factor = gtsam::HessianFactor::shared_ptr(new gtsam::HessianFactor(this->keys_[0], this->keys_[1], H_00, H_01, -b_0, H_11, -b_1, error)); return factor; } @@ -149,8 +157,7 @@ void IntegratedCT_GICPFactor_::update_correspondences( this->correspondences.resize(frame::size(*this->source)); this->mahalanobis.resize(frame::size(*this->source)); -#pragma omp parallel for schedule(guided, 8) num_threads(this->num_threads) - for (int i = 0; i < frame::size(*this->source); i++) { + const auto perpoint_task = [&](int i) { const int time_index = this->time_indices[i]; const Eigen::Matrix4d pose = this->source_poses[time_index].matrix(); @@ -175,6 +182,24 @@ void IntegratedCT_GICPFactor_::update_correspondences( mahalanobis[i].setZero(); mahalanobis[i].block<3, 3>(0, 0) = RCR.block<3, 3>(0, 0).inverse(); } + }; + + if (is_omp_default()) { +#pragma omp parallel for num_threads(this->num_threads) schedule(guided, 8) + for (int i = 0; i < frame::size(*this->source); i++) { + perpoint_task(i); + } + } else { +#ifdef GTSAM_POINTS_TBB + tbb::parallel_for(tbb::blocked_range(0, frame::size(*this->source), 8), [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); i++) { + perpoint_task(i); + } + }); +#else + std::cerr << "error: TBB is not available" << std::endl; + abort(); +#endif } } } // namespace gtsam_points \ No newline at end of file diff --git a/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp index 13f5d39..5e64a9a 100644 --- a/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_ct_icp_factor_impl.hpp @@ -5,6 +5,12 @@ #include #include +#include +#include + +#ifdef GTSAM_POINTS_TBB +#include +#endif namespace gtsam_points { @@ -72,11 +78,16 @@ double IntegratedCT_ICPFactor_::error(const gtsam::Val update_correspondences(); } - double sum_errors = 0.0; - for (int i = 0; i < frame::size(*source); i++) { + const auto perpoint_task = [&]( + int i, + Eigen::Matrix* H_target, + Eigen::Matrix* H_source, + Eigen::Matrix* H_target_source, + Eigen::Matrix* b_target, + Eigen::Matrix* b_source) { const long target_index = correspondences[i]; if (target_index < 0) { - continue; + return 0.0; } const int time_index = time_indices[i]; @@ -90,10 +101,14 @@ double IntegratedCT_ICPFactor_::error(const gtsam::Val gtsam::Point3 residual = transed_source_pt - target_pt.template head<3>(); double error = gtsam::dot(residual, target_normal.template head<3>()); - sum_errors += 0.5 * error * error; - } + return 0.5 * error * error; + }; - return sum_errors; + if (is_omp_default()) { + return scan_matching_reduce_omp(perpoint_task, frame::size(*this->source), this->num_threads, nullptr, nullptr, nullptr, nullptr, nullptr); + } else { + return scan_matching_reduce_tbb(perpoint_task, frame::size(*this->source), nullptr, nullptr, nullptr, nullptr, nullptr); + } } template @@ -106,17 +121,16 @@ boost::shared_ptr IntegratedCT_ICPFactor_* H_00, + Eigen::Matrix* H_11, + Eigen::Matrix* H_01, + Eigen::Matrix* b_0, + Eigen::Matrix* b_1) { const long target_index = correspondences[i]; if (target_index < 0) { - continue; + return 0.0; } const int time_index = time_indices[i]; @@ -141,15 +155,29 @@ boost::shared_ptr IntegratedCT_ICPFactor_source), this->num_threads, &H_00, &H_11, &H_01, &b_0, &b_1); + } else { + error = scan_matching_reduce_tbb(perpoint_task, frame::size(*this->source), &H_00, &H_11, &H_01, &b_0, &b_1); } - auto factor = gtsam::HessianFactor::shared_ptr(new gtsam::HessianFactor(keys_[0], keys_[1], H_00, H_01, -b_0, H_11, -b_1, sum_errors)); + auto factor = gtsam::HessianFactor::shared_ptr(new gtsam::HessianFactor(this->keys_[0], this->keys_[1], H_00, H_01, -b_0, H_11, -b_1, error)); return factor; } @@ -188,7 +216,7 @@ template void IntegratedCT_ICPFactor_::update_correspondences() const { correspondences.resize(frame::size(*source)); - for (int i = 0; i < frame::size(*source); i++) { + const auto perpoint_task = [&](int i) { const int time_index = time_indices[i]; const auto& pt = frame::point(*source, i); @@ -203,6 +231,24 @@ void IntegratedCT_ICPFactor_::update_correspondences() } else { correspondences[i] = k_index; } + }; + + if (is_omp_default()) { +#pragma omp parallel for num_threads(this->num_threads) schedule(guided, 8) + for (int i = 0; i < frame::size(*this->source); i++) { + perpoint_task(i); + } + } else { +#ifdef GTSAM_POINTS_TBB + tbb::parallel_for(tbb::blocked_range(0, frame::size(*this->source), 8), [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); i++) { + perpoint_task(i); + } + }); +#else + std::cerr << "error: TBB is not available" << std::endl; + abort(); +#endif } } diff --git a/src/test/test_continuous_time.cpp b/src/test/test_continuous_time.cpp index 264eabe..f3c1801 100644 --- a/src/test/test_continuous_time.cpp +++ b/src/test/test_continuous_time.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,7 @@ TEST_F(ContinuousTimeFactorsTestBase, LoadCheck) { ASSERT_EQ(deskewed_target_frames.size(), 3) << "Failed to load target frames"; } -struct ContinuousTimeFactorTest : public ContinuousTimeFactorsTestBase, public testing::WithParamInterface { +struct ContinuousTimeFactorTest : public ContinuousTimeFactorsTestBase, public testing::WithParamInterface> { public: double pointcloud_distance(const gtsam_points::PointCloud::ConstPtr& frame1, const gtsam_points::PointCloud::ConstPtr& frame2) { gtsam_points::KdTree tree(frame2->points, frame2->size()); @@ -72,9 +73,24 @@ struct ContinuousTimeFactorTest : public ContinuousTimeFactorsTestBase, public t } }; -INSTANTIATE_TEST_SUITE_P(gtsam_points, ContinuousTimeFactorTest, testing::Values("CTICP", "CTGICP"), [](const auto& info) { return info.param; }); +INSTANTIATE_TEST_SUITE_P( + gtsam_points, + ContinuousTimeFactorTest, + testing::Combine(testing::Values("CTICP", "CTGICP"), testing::Values("NONE", "OMP", "TBB")), + [](const auto& info) { return std::get<0>(info.param) + "_" + std::get<1>(info.param); }); TEST_P(ContinuousTimeFactorTest, AlignmentTest) { + const auto param = GetParam(); + const std::string method = std::get<0>(param); + const std::string parallelism = std::get<1>(param); + const int num_threads = parallelism == "OMP" ? 2 : 1; + + if (parallelism == "TBB") { + gtsam_points::set_tbb_as_default(); + } else { + gtsam_points::set_omp_as_default(); + } + for (int i = 0; i < 3; i++) { gtsam::Values values; values.insert(0, gtsam::Pose3::Identity()); @@ -84,10 +100,14 @@ TEST_P(ContinuousTimeFactorTest, AlignmentTest) { const auto source = raw_source_frames[i]; gtsam_points::IntegratedCT_ICPFactor::shared_ptr factor; - if (GetParam() == "CTICP") { - factor.reset(new gtsam_points::IntegratedCT_ICPFactor(0, 1, target, source)); - } else if (GetParam() == "CTGICP") { - factor.reset(new gtsam_points::IntegratedCT_GICPFactor(0, 1, target, source)); + if (method == "CTICP") { + auto f = gtsam::make_shared(0, 1, target, source); + f->set_num_threads(num_threads); + factor = f; + } else if (method == "CTGICP") { + auto f = gtsam::make_shared(0, 1, target, source); + f->set_num_threads(num_threads); + factor = f; } gtsam::NonlinearFactorGraph graph;