BoostGSoC21 · ckormanyos · Aug 11, 2021 · Aug 9, 2021 · Aug 9, 2021 · Aug 9, 2021
diff --git a/include/boost/math/fft/algorithms.hpp b/include/boost/math/fft/algorithms.hpp
@@ -369,17 +369,28 @@
 
     std::reverse(prime_factors.begin(), prime_factors.begin()+nfactors);
 
+    //auto show = [&] (const ComplexType* beg, const ComplexType* end)
+    //{
+    //  for(;beg!=end;++beg)
+    //  {
+    //    std::cout << *beg << ", ";
+    //  }
+    //  std::cout << "\n";
+    //};
+
     // butterfly pattern
     long len = 1;
     for (int ip=0;ip<nfactors;++ip)
     {
+      //std::cout << "pass " << ip << "\n";
       int p = prime_factors[ip];
       long len_old = len;
       len *= p;
 
       std::vector<ComplexType,allocator_type> tmp(p,alloc);
       for (long i = 0; i < n; i += len)
       {
+        //std::cout << "    i = " << i << "\n";
         for(long k=0;k<len_old;++k)
         {
           for(long j=0;j<p;++j)
@@ -399,6 +410,7 @@
           for(long j=0;j<p;++j)
             out[i+ j*len_old + k] = tmp[j];
         }
+        //show(out+i,out+i+len);
       }
     }
   }
@@ -489,8 +501,59 @@
     }
   }
 
+  template <class T>
+  void complex_dft_power2(const T *in_first, const T *in_last, T* out, int sign)
+  {
+    /*
+      Cooley-Tukey mapping, in-place Decimation in Time 
+    */
+    const long ptrdiff = static_cast<long>(std::distance(in_first,in_last));
+    if(ptrdiff <=0 )
+      return;
+    const long n = lower_bound_power2(ptrdiff);
+
+    if(in_first!=out)
+      std::copy(in_first,in_last,out);
+
+    if (n == 1)
+        return;
+
+
+    // Gold-Rader bit-reversal algorithm.
+    for(int i=0,j=0;i<n-1;++i)
+    { 
+      if(i<j)
+        std::swap(out[i],out[j]);
+      for(int k=n>>1;!( (j^=k)&k );k>>=1);
+    }
+
+
+    for (int len = 2, k = 1; len <= n; len <<= 1, ++k)
+    {
+      for (int i = 0; i < n; i += len)
+      {
+        {
+          int j=0;
+          T* u = out + i + j, *v = out + i + j + len / 2;
+          T Bu = *u, Bv = *v;
+          *u = Bu + Bv;
+          *v = Bu - Bv;
+        }
+        for (int j = 1; j < len / 2; ++j)
+        {
+          T cs{complex_root_of_unity<T>(1<<k,j*sign)};
+
+          T* u = out + i + j, *v = out + i + j + len / 2;
+          T Bu = *u, Bv = *v * cs;
+          *u = Bu + Bv;
+          *v = Bu - Bv;
+        }
+      }
+    }
+  }
+
   template<class complex_value_type>
-  void complex_dft_power2(
+  void complex_dft_power2_dif(
     const complex_value_type *in_first, 
     const complex_value_type *in_last, 
     complex_value_type* out, 

diff --git a/include/boost/math/fft/bsl_backend.hpp b/include/boost/math/fft/bsl_backend.hpp
@@ -15,6 +15,7 @@
   #include <boost/math/fft/multiprecision_complex.hpp>
 
   #include <boost/math/fft/algorithms.hpp>
+  #include <boost/math/fft/real_algorithms.hpp>
   #include <boost/math/fft/dft_api.hpp>
 
   namespace boost { namespace math {  namespace fft { 
@@ -138,11 +139,101 @@
     std::size_t my_size{};
   };
 
+
+  template<class T, class allocator_t = std::allocator<T> >
+  class bsl_rfft_backend
+  {
+  public:
+    using value_type     = T;
+    using allocator_type = allocator_t;
+
+    // the provided root of unity is used instead of exp(-i 2 pi/n)
+    constexpr bsl_rfft_backend(std::size_t n, const allocator_type& in_alloc = allocator_type{}):
+        alloc{in_alloc},
+        my_size{n}
+    { 
+    }
+
+    ~bsl_rfft_backend()
+    {
+    }
+
+    void resize(std::size_t new_size)
+    {
+      my_size = new_size;
+    }
+
+    constexpr std::size_t size() const { return my_size; }
+    constexpr std::size_t unique_complex_size() const { return my_size/2 + 1;}
+
+    void real_to_halfcomplex(const value_type* in, value_type* out) const
+    {
+      const long N = static_cast<long>(size());
+      // select the implementation according to the DFT size
+      switch(N)
+      {
+        case 0:
+          return;
+        case 1:
+          out[0]=in[0];
+          return;
+        case 2:
+          detail::real_dft_2(in,out,1);
+          return;
+      }
+      if( detail::is_power2(N))
+      {
+        detail::real_dft_power2(in,in+N,out,1);
+      }else
+      {
+        detail::real_dft_composite(in,in+N,out,1,alloc);
+      }
+      //if(detail::is_prime(N))
+      //{
+      //  detail::real_dft_prime_rader(in,in+N,out,sign,alloc);
+      //}
+    }
+    void halfcomplex_to_real(const value_type* in, value_type* out) const
+    {
+      const long N = static_cast<long>(size());
+      // select the implementation according to the DFT size
+      switch(N)
+      {
+        case 0:
+          return;
+        case 1:
+          out[0]=in[0];
+          return;
+        case 2:
+          detail::real_inverse_dft_2(in,out,1);
+          return;
+      }
+      if( detail::is_power2(N))
+      { 
+        detail::real_inverse_dft_power2(in,in+N,out,1);
+      }else  
+      {
+        detail::real_inverse_dft_composite(in,in+N,out,1,alloc);
+      }
+      //if(detail::is_prime(N))
+      //{
+      //  detail::real_inverse_dft_prime_rader(in,in+N,out,sign,alloc);
+      //}
+    }
+
+  private:
+    allocator_type alloc;
+    std::size_t my_size{};
+  };
+
   } // namespace detail
 
   template<class RingType = std::complex<double>, class Allocator_t = std::allocator<RingType> >
   using bsl_dft = detail::complex_dft<detail::bsl_backend,RingType,Allocator_t>;
 
+  template<class T = double, class Allocator_t = std::allocator<T> >
+  using bsl_rfft = detail::real_dft<detail::bsl_rfft_backend,T,Allocator_t>;
+
   template<class RingType = std::complex<double>, class Allocator_t = std::allocator<RingType> >
   using bsl_algebraic_dft = detail::algebraic_dft<detail::bsl_backend,RingType,Allocator_t>;
 

diff --git a/include/boost/math/fft/fftw_backend.hpp b/include/boost/math/fft/fftw_backend.hpp
@@ -574,28 +574,14 @@
     // -> size(out) >= N
     {
       const std::size_t N = size();
-      vector_t<real_value_type> tmp(out,out+N);
-      out[0]=tmp[0];
-      for(unsigned int i=1,j=1;j<N;++i,j+=2)
-      {
-        out[j] = tmp[i];
-        if(j+1<N)
-          out[j+1] = tmp[N-i];
-      }
+      for(unsigned int i=1,j=N-1;i<j;++i,--j)
+        out[j] = -out[j];
     }
     void unpack_halfcomplex(real_value_type* out) const
     // precondition:
     // -> size(out) >= N
     {
-      const std::size_t N = size();
-      vector_t<real_value_type> tmp(out,out+N);
-      out[0]=tmp[0];
-      for(unsigned int i=1,j=1;j<N;++i,j+=2)
-      {
-        out[i] = tmp[j];
-        if(j+1<N)
-          out[N-i] = tmp[j+1];
-      }
+      pack_halfcomplex(out);  
     }
 
   public:
@@ -638,6 +624,11 @@
       std::copy(in,in+size(),out);
       unpack_halfcomplex(out);
       execute(my_hc2r_plan,my_hc2r_unaligned_plan,out,out);
+
+      const std::size_t N = size();
+      const real_value_type inv_N = real_value_type{1.0}/N;
+      for(unsigned int i=0;i<N;++i)
+        out[i] *= inv_N;
     }
     template<class Complex>
     void real_to_complex(const real_value_type* in, Complex *out)const

diff --git a/include/boost/math/fft/gsl_backend.hpp b/include/boost/math/fft/gsl_backend.hpp
@@ -174,6 +174,34 @@ namespace fft { namespace detail {
       gsl_fft_halfcomplex_wavetable *halfcomplex_wtable;
       gsl_fft_real_workspace        *real_wspace;
 
+      void pack_halfcomplex(real_value_type* out) const
+      // precondition:
+      // -> size(out) >= N
+      {
+        const std::size_t N = size();
+        vector_t<real_value_type> tmp(out,out+N);
+        out[0]=tmp[0];
+        for(unsigned int i=1,j=1;j<N;++i,j+=2)
+        {
+          out[j] = tmp[i];
+          if(j+1<N)
+            out[j+1] = -tmp[N-i];
+        }
+      }
+      void unpack_halfcomplex(real_value_type* out) const
+      // precondition:
+      // -> size(out) >= N
+      {
+        const std::size_t N = size();
+        vector_t<real_value_type> tmp(out,out+N);
+        out[0]=tmp[0];
+        for(unsigned int i=1,j=1;j<N;++i,j+=2)
+        {
+          out[i] = tmp[j];
+          if(j+1<N)
+            out[N-i] = -tmp[j+1];
+        }
+      }
       template<plan_type p>
       void execute(const real_value_type* in, 
                    real_value_type* out,
@@ -189,6 +217,7 @@ namespace fft { namespace detail {
         }
         gsl_fft_real_transform(
           out,1, N, real_wtable, real_wspace);
+        unpack_halfcomplex(out);
       }
       template<plan_type p>
       void execute(const real_value_type* in, 
@@ -203,8 +232,13 @@ namespace fft { namespace detail {
           // undefined-behavior
           std::copy(in,in+N,out);
         }
+        pack_halfcomplex(out);
         gsl_fft_halfcomplex_transform(
           out,1, N, halfcomplex_wtable, real_wspace);
+
+        const real_value_type inv_N = real_value_type{1.0}/N;
+        for(unsigned int i=0;i<N;++i)
+          out[i] *= inv_N;
       }
 
       void free()