Skip to content

Commit

Permalink
[Trifocal+P2Pt] micro improvements to code
Browse files Browse the repository at this point in the history
  • Loading branch information
rfabbri committed Nov 14, 2023
1 parent 5db44e8 commit 2ebaa3c
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 17 deletions.
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -394,11 +394,25 @@ from, specially under Linux.
The best way is with kcachegrind + valgrind, by far.
See [https://www.blogger.com/comment.g?blogID=7395958&postID=116062684092668856&bpli=1&pli=1]

In any system without valgrind or kcachegrind (eg, Macs), the easiest way is with gprof
If no valgrind or kcachegrind is available (eg, some Macs), the easiest way is with gprof

Expect your program to take very very long - so reduce the problem / iterations
Expect your program to take very very long - so maybe reduce the problem / iterations
before running.

Mac OS:
Follow these intructions to install valgrind.
https://github.com/LouisBrunner/valgrind-macos

Install qcachegrind
brew install qcachegrind

Use
valgrind --tool=callgrind minus-chicago -g

kcachegrind (or qcachegrind)



#### GPerftools
https://developer.ridgerun.com/wiki/index.php/Profiling_with_GPerfTools
https://github.com/gperftools/gperftools
Expand Down
6 changes: 3 additions & 3 deletions minus/chicago14a-lsolve.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ lsolve(
// rowsj
__builtin_prefetch(m.col(k).data()+9);
const unsigned char rrows = rows-k-1; unsigned char row_of_biggest_in_col = k;
F biggest_in_corner = std::norm(m(k,k));
F biggest_in_corner = std::norm(m(k,k))*1e3;
for (unsigned j=rows-1; j != k; --j) { // todo: no need to go beyond row 10, Hxt rows 11 12 and 13 are fixed
F tmp;
if (unlikely((tmp = std::norm(m(j,k))) > biggest_in_corner*1e3)) {
biggest_in_corner = tmp; row_of_biggest_in_col = j;
if (unlikely((tmp = std::norm(m(j,k))) > biggest_in_corner)) {
biggest_in_corner = tmp*1e3; row_of_biggest_in_col = j;
break;
}
}
Expand Down
24 changes: 12 additions & 12 deletions minus/minus.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ using namespace Eigen; // only used for linear solve

template <problem P, typename F>
__attribute__((always_inline)) inline void
memoize_Hxt(C<F> __restrict *block, C<F> * __restrict memo /* constants */)
memoize_Hxt(C<F> __restrict *block/*, C<F> * __restrict memo*/ /* constants */)
{
C<F> *const y = reinterpret_cast<C<F> *> (__builtin_assume_aligned(block,64));
C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));
// C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));

y[11]=y[13]=y[25]=y[27]=y[39]=y[41]=y[53]=y[55]=y[67]=
y[68]=y[81]=y[82]=y[95]=y[96]=y[109]=y[110]=y[124]=
Expand All @@ -59,10 +59,10 @@ memoize_Hxt(C<F> __restrict *block, C<F> * __restrict memo /* constants */)

template <problem P, typename F>
__attribute__((always_inline)) inline void
memoize_HxH(C<F> __restrict *block, C<F> * __restrict memo /* constants */)
memoize_HxH(C<F> __restrict *block/*, C<F> * __restrict memo*/ /* constants */)
{
C<F> *const y = reinterpret_cast<C<F> *> (__builtin_assume_aligned(block,64));
C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));
// C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));

y[11]=y[13]=y[25]=y[27]=y[39]=y[41]=y[53]=y[55]=y[67]=
y[68]=y[81]=y[82]=y[95]=y[96]=y[109]=y[110]=y[124]=
Expand Down Expand Up @@ -119,8 +119,8 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
static constexpr F the_smallest_number = 1e-13; // XXX BENCHMARK THIS
typedef minus_array<f::nve,F> v;

alignas(64) C<F> ycHxt[16];
alignas(64) C<F> ycHxH[13];
// alignas(64) C<F> ycHxt[16];
// alignas(64) C<F> ycHxH[13];
// memoization_init() :

// C<F> previous[13];
Expand Down Expand Up @@ -156,8 +156,8 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
v::fcopy(x0t0, xt);

// dx1
evaluate_Hxt_constants(xt, params, ycHxt);
memoize_Hxt<P,F>(Hxt, ycHxt);
// evaluate_Hxt_constants(xt, params, ycHxt);
memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
evaluate_Hxt(xt, params, Hxt); // Outputs Hxt
// dx4_eigen = lu.compute(AA).solve(bb);
lsolve<P,F>(AA, dx4);
Expand All @@ -171,7 +171,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
v::multiply_scalar_to_self(dx4, 2.);
xt[f::nve] += one_half_dt; // t0+.5dt
evaluate_Hxt(xt, params, Hxt);
memoize_Hxt<P,F>(Hxt, ycHxt);
memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
lsolve<P,F>(AA, dxi);

// dx3
Expand All @@ -181,7 +181,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
v::multiply_scalar_to_self(dxi, 4.);
v::add_to_self(dx4, dxi);
evaluate_Hxt(xt, params, Hxt);
memoize_Hxt<P,F>(Hxt, ycHxt);
memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
lsolve<P,F>(AA, dxi);

// dx4
Expand All @@ -192,7 +192,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
v::add_to_self(dx4, dxi);
xt[f::nve] = *t0 + *dt; // t0+dt
evaluate_Hxt(xt, params, Hxt);
memoize_Hxt<P,F>(Hxt, ycHxt);
memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
lsolve<P,F>(AA, dxi);
v::multiply_scalar_to_self(dxi, *dt);
v::add_to_self(dx4, dxi);
Expand Down Expand Up @@ -233,7 +233,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
do {
++n_corr_steps;
evaluate_HxH(x1t1, params, HxH);
memoize_HxH<P,F>(HxH, ycHxH);
memoize_HxH<P,F>(HxH);//, ycHxH);
lsolve<P,F>(AA, dx);
v::add_to_self(x1t1, dx);
is_successful = v::norm2(dx) < s.epsilon2_ * v::norm2(x1t1); // |dx|^2/|x1|^2 < eps2
Expand Down

0 comments on commit 2ebaa3c

Please sign in to comment.