[Trifocal+P2Pt] micro improvements to code

rfabbri · Nov 14, 2023 · 2ebaa3c · 2ebaa3c
1 parent 5db44e8
commit 2ebaa3c
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -394,11 +394,25 @@ from, specially under Linux.
 The best way is with kcachegrind + valgrind, by far. 
 See [https://www.blogger.com/comment.g?blogID=7395958&postID=116062684092668856&bpli=1&pli=1]
 
-In any system without valgrind or kcachegrind (eg, Macs), the easiest way is with gprof
+If no valgrind or kcachegrind is available (eg, some Macs), the easiest way is with gprof
 
-Expect your program to take very very long - so reduce the problem / iterations
+Expect your program to take very very long - so maybe reduce the problem / iterations
 before running.
 
+Mac OS:
+Follow these intructions to install valgrind.
+https://github.com/LouisBrunner/valgrind-macos
+
+Install qcachegrind
+brew install qcachegrind
+
+Use
+valgrind --tool=callgrind minus-chicago -g
+
+kcachegrind (or qcachegrind)
+
+
+
 #### GPerftools
 https://developer.ridgerun.com/wiki/index.php/Profiling_with_GPerfTools
 https://github.com/gperftools/gperftools

diff --git a/minus/chicago14a-lsolve.hxx b/minus/chicago14a-lsolve.hxx
@@ -15,11 +15,11 @@ lsolve(
                                                  // rowsj
     __builtin_prefetch(m.col(k).data()+9);
     const unsigned char rrows = rows-k-1; unsigned char row_of_biggest_in_col = k;
-    F biggest_in_corner = std::norm(m(k,k));
+    F biggest_in_corner = std::norm(m(k,k))*1e3;
     for (unsigned j=rows-1; j != k; --j) { // todo: no need to go beyond row 10, Hxt rows 11 12 and 13 are fixed
       F tmp;
-      if (unlikely((tmp = std::norm(m(j,k))) > biggest_in_corner*1e3)) {
-          biggest_in_corner = tmp; row_of_biggest_in_col = j;
+      if (unlikely((tmp = std::norm(m(j,k))) > biggest_in_corner)) {
+          biggest_in_corner = tmp*1e3; row_of_biggest_in_col = j;
           break;
       }
     }

diff --git a/minus/minus.hxx b/minus/minus.hxx
@@ -30,10 +30,10 @@ using namespace Eigen; // only used for linear solve
 
 template <problem P, typename F>
 __attribute__((always_inline)) inline void
-memoize_Hxt(C<F> __restrict *block, C<F> * __restrict memo /* constants */)
+memoize_Hxt(C<F> __restrict *block/*, C<F> * __restrict memo*/ /* constants */)
 {
   C<F> *const y = reinterpret_cast<C<F> *> (__builtin_assume_aligned(block,64));
-  C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));
+//  C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));
 
   y[11]=y[13]=y[25]=y[27]=y[39]=y[41]=y[53]=y[55]=y[67]=
         y[68]=y[81]=y[82]=y[95]=y[96]=y[109]=y[110]=y[124]=
@@ -59,10 +59,10 @@ memoize_Hxt(C<F> __restrict *block, C<F> * __restrict memo /* constants */)
 
 template <problem P, typename F>
 __attribute__((always_inline)) inline void
-memoize_HxH(C<F> __restrict *block, C<F> * __restrict memo /* constants */)
+memoize_HxH(C<F> __restrict *block/*, C<F> * __restrict memo*/ /* constants */)
 {
   C<F> *const y = reinterpret_cast<C<F> *> (__builtin_assume_aligned(block,64));
-  C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));
+//  C<F> *const yc = reinterpret_cast<C<F> *> (__builtin_assume_aligned(memo,64));
 
   y[11]=y[13]=y[25]=y[27]=y[39]=y[41]=y[53]=y[55]=y[67]=
         y[68]=y[81]=y[82]=y[95]=y[96]=y[109]=y[110]=y[124]=
@@ -119,8 +119,8 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
   static constexpr F the_smallest_number = 1e-13; // XXX BENCHMARK THIS
   typedef minus_array<f::nve,F> v;
 
-  alignas(64) C<F> ycHxt[16]; 
-  alignas(64) C<F> ycHxH[13];
+  //  alignas(64) C<F> ycHxt[16]; 
+  //  alignas(64) C<F> ycHxH[13];
   // memoization_init() : 
 
   // C<F> previous[13];
@@ -156,8 +156,8 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
       v::fcopy(x0t0, xt);
 
       // dx1
-      evaluate_Hxt_constants(xt, params, ycHxt);
-      memoize_Hxt<P,F>(Hxt, ycHxt);
+      // evaluate_Hxt_constants(xt, params, ycHxt);
+      memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
       evaluate_Hxt(xt, params, Hxt); // Outputs Hxt
       // dx4_eigen = lu.compute(AA).solve(bb);
       lsolve<P,F>(AA, dx4);
@@ -171,7 +171,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
       v::multiply_scalar_to_self(dx4, 2.);
       xt[f::nve] += one_half_dt;  // t0+.5dt
       evaluate_Hxt(xt, params, Hxt);
-      memoize_Hxt<P,F>(Hxt, ycHxt);
+      memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
       lsolve<P,F>(AA, dxi);
 
       // dx3
@@ -181,7 +181,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
       v::multiply_scalar_to_self(dxi, 4.);
       v::add_to_self(dx4, dxi);
       evaluate_Hxt(xt, params, Hxt);
-      memoize_Hxt<P,F>(Hxt, ycHxt);
+      memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
       lsolve<P,F>(AA, dxi);
 
       // dx4
@@ -192,7 +192,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
       v::add_to_self(dx4, dxi);
       xt[f::nve] = *t0 + *dt;               // t0+dt
       evaluate_Hxt(xt, params, Hxt);
-      memoize_Hxt<P,F>(Hxt, ycHxt);
+      memoize_Hxt<P,F>(Hxt);/*, ycHxt);*/
       lsolve<P,F>(AA, dxi);
       v::multiply_scalar_to_self(dxi, *dt);
       v::add_to_self(dx4, dxi);
@@ -233,7 +233,7 @@ track(const track_settings &s, const C<F> s_sols_u[f::nve*f::nsols], const C<F>
       do {
         ++n_corr_steps;
         evaluate_HxH(x1t1, params, HxH);
-        memoize_HxH<P,F>(HxH, ycHxH);
+        memoize_HxH<P,F>(HxH);//, ycHxH);
         lsolve<P,F>(AA, dx);
         v::add_to_self(x1t1, dx);
         is_successful = v::norm2(dx) < s.epsilon2_ * v::norm2(x1t1); // |dx|^2/|x1|^2 < eps2