Skip to content

Commit

Permalink
[Trifocal+P2Pt] microopt: annotated branch prediction, tiny advantage…
Browse files Browse the repository at this point in the history
… at best
  • Loading branch information
rfabbri committed Nov 14, 2023
1 parent 4427426 commit 5db44e8
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 17 deletions.
1 change: 1 addition & 0 deletions minus/chicago14a-Hxt.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Hxt(const C<F> * __restrict ux, const C<F> * __restrict uparams, C<F> * __restri
const C<F> *params = reinterpret_cast<C<F> *> (__builtin_assume_aligned(uparams,64));
const C<F> *x = reinterpret_cast<C<F> *> (__builtin_assume_aligned(ux,64));
C<F> *y = reinterpret_cast<C<F> *> (__builtin_assume_aligned(uy,64));
__builtin_prefetch(x);

const C<F> &X0 = x[0]; // q0
const C<F> &X1 = x[1]; // q1
Expand Down
7 changes: 4 additions & 3 deletions minus/chicago14a-lsolve.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,19 @@ lsolve(
// specific to m structure of
// having many zeros in last 3
// rowsj
__builtin_prefetch(m.col(k).data()+9);
const unsigned char rrows = rows-k-1; unsigned char row_of_biggest_in_col = k;
F biggest_in_corner = std::norm(m(k,k));
for (unsigned j=rows-1; j != k; --j) { // todo: no need to go beyond row 10, Hxt rows 11 12 and 13 are fixed
F tmp;
if ((tmp = std::norm(m(j,k))) > biggest_in_corner*1e3) {
if (unlikely((tmp = std::norm(m(j,k))) > biggest_in_corner*1e3)) {
biggest_in_corner = tmp; row_of_biggest_in_col = j;
break;
}
}
if (k != row_of_biggest_in_col) m.row(k).swap(m.row(row_of_biggest_in_col));
if (likely(k != row_of_biggest_in_col)) m.row(k).swap(m.row(row_of_biggest_in_col));
m.col(k).tail(rrows) /= m(k,k);
if (k < rows-1)
if (likely(k < rows-1))
m.block(M::f::nve-rrows,M::f::nve-rrows,rrows,rrows).noalias() -= m.col(k).tail(rrows) * m.row(k).segment(M::f::nve-rrows,rrows);
}
x[0] = m(0,14);
Expand Down
31 changes: 17 additions & 14 deletions minus/minus.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
//#include "Eigen-latest/Core"
#include "Eigen/Core"

#define unlikely(expr) __builtin_expect(!!(expr),0)
#define likely(expr) __builtin_expect(!!(expr),1)

namespace MiNuS {

using namespace Eigen; // only used for linear solve
Expand Down Expand Up @@ -132,18 +135,17 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
char predictor_successes = 0;

// track H(x,t) for t in [0,1]
while (t_s->status == PROCESSING
&& 1 - *t0 > the_smallest_number) {
if (t_s->num_steps == s.max_num_steps_) {
while (likely(t_s->status == PROCESSING && 1. - *t0 > the_smallest_number)) {
if (unlikely(t_s->num_steps == s.max_num_steps_)) {
t_s->status = MAX_NUM_STEPS_FAIL; // failed to reach solution in the available step budget
break;
}

if (!end_zone && 1 - *t0 <= s.end_zone_factor_ + the_smallest_number)
if (unlikely(!end_zone && 1. - *t0 <= s.end_zone_factor_ + the_smallest_number))
end_zone = true; // TODO: see if this path coincides with any other path on entry to the end zone
if (end_zone) {
if (*dt > 1 - *t0) *dt = 1 - *t0;
} else if (*dt > 1 - s.end_zone_factor_ - *t0) *dt = 1 - s.end_zone_factor_ - *t0;
if (unlikely(end_zone)) {
if (unlikely(*dt > 1. - *t0)) *dt = 1 - *t0;
} else if (unlikely(*dt > 1. - s.end_zone_factor_ - *t0)) *dt = 1. - s.end_zone_factor_ - *t0;
/// PREDICTOR /// in: x0t0,dt out: dx
/* top-level code for Runge-Kutta-4
dx1 := solveHxTimesDXequalsminusHt(x0,t0);
Expand Down Expand Up @@ -176,7 +178,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
v::multiply_scalar_to_self(dxi, one_half_dt);
v::copy(x0t0, xt);
v::add_to_self(xt, dxi);
v::multiply_scalar_to_self(dxi, 4);
v::multiply_scalar_to_self(dxi, 4.);
v::add_to_self(dx4, dxi);
evaluate_Hxt(xt, params, Hxt);
memoize_Hxt<P,F>(Hxt, ycHxt);
Expand All @@ -186,7 +188,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
v::multiply_scalar_to_self(dxi, *dt);
v::fcopy(x0t0, xt);
v::add_to_self(xt, dxi);
v::multiply_scalar_to_self(dxi, 2);
v::multiply_scalar_to_self(dxi, 2.);
v::add_to_self(dx4, dxi);
xt[f::nve] = *t0 + *dt; // t0+dt
evaluate_Hxt(xt, params, Hxt);
Expand All @@ -202,6 +204,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
// make prediction
v::fcopy(x0t0, x1t1);
v::fadd_to_self(x1t1, dxdt);


/// CORRECTOR ///
char n_corr_steps = 0;
Expand Down Expand Up @@ -234,23 +237,23 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
lsolve<P,F>(AA, dx);
v::add_to_self(x1t1, dx);
is_successful = v::norm2(dx) < s.epsilon2_ * v::norm2(x1t1); // |dx|^2/|x1|^2 < eps2
} while (!is_successful && n_corr_steps < s.max_corr_steps_);
} while (likely(!is_successful && n_corr_steps < s.max_corr_steps_));

if (!is_successful) { // predictor failure
if (unlikely(!is_successful)) { // predictor failure
predictor_successes = 0;
*dt *= s.dt_decrease_factor_;
if (*dt < s.min_dt_) t_s->status = MIN_STEP_FAILED; // slight difference to SLP-imp.hpp:612
if (unlikely(*dt < s.min_dt_)) t_s->status = MIN_STEP_FAILED; // slight difference to SLP-imp.hpp:612
} else { // predictor success
++predictor_successes;
// std::swap(x1t1,x0t0);
// x0 = x0t0; t0 = (F *) (x0t0 + f::nve); xt = x1t1;
v::fcopy(x1t1, x0t0);
if (predictor_successes >= s.num_successes_before_increase_) {
if (unlikely(predictor_successes >= s.num_successes_before_increase_)) {
predictor_successes = 0;
*dt *= s.dt_increase_factor_;
}
}
if (v::norm2(x0) > s.infinity_threshold2_)
if (unlikely(v::norm2(x0) > s.infinity_threshold2_))
t_s->status = INFINITY_FAILED;
++t_s->num_steps;
} // while (t loop)
Expand Down

0 comments on commit 5db44e8

Please sign in to comment.