Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Real to complex #21

Merged
merged 13 commits into from
Aug 11, 2021
65 changes: 64 additions & 1 deletion include/boost/math/fft/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,17 +369,28 @@

std::reverse(prime_factors.begin(), prime_factors.begin()+nfactors);

//auto show = [&] (const ComplexType* beg, const ComplexType* end)
//{
// for(;beg!=end;++beg)
// {
// std::cout << *beg << ", ";
// }
// std::cout << "\n";
//};

// butterfly pattern
long len = 1;
for (int ip=0;ip<nfactors;++ip)
{
//std::cout << "pass " << ip << "\n";
int p = prime_factors[ip];
long len_old = len;
len *= p;

std::vector<ComplexType,allocator_type> tmp(p,alloc);
for (long i = 0; i < n; i += len)
{
//std::cout << " i = " << i << "\n";
for(long k=0;k<len_old;++k)
{
for(long j=0;j<p;++j)
Expand All @@ -399,6 +410,7 @@
for(long j=0;j<p;++j)
out[i+ j*len_old + k] = tmp[j];
}
//show(out+i,out+i+len);
}
}
}
Expand Down Expand Up @@ -489,8 +501,59 @@
}
}

template <class T>
void complex_dft_power2(const T *in_first, const T *in_last, T* out, int sign)
{
/*
Cooley-Tukey mapping, in-place Decimation in Time
*/
const long ptrdiff = static_cast<long>(std::distance(in_first,in_last));
if(ptrdiff <=0 )
return;
const long n = lower_bound_power2(ptrdiff);

if(in_first!=out)
std::copy(in_first,in_last,out);

if (n == 1)
return;


// Gold-Rader bit-reversal algorithm.
for(int i=0,j=0;i<n-1;++i)
{
if(i<j)
std::swap(out[i],out[j]);
for(int k=n>>1;!( (j^=k)&k );k>>=1);
}


for (int len = 2, k = 1; len <= n; len <<= 1, ++k)
{
for (int i = 0; i < n; i += len)
{
{
int j=0;
T* u = out + i + j, *v = out + i + j + len / 2;
T Bu = *u, Bv = *v;
*u = Bu + Bv;
*v = Bu - Bv;
}
for (int j = 1; j < len / 2; ++j)
{
T cs{complex_root_of_unity<T>(1<<k,j*sign)};

T* u = out + i + j, *v = out + i + j + len / 2;
T Bu = *u, Bv = *v * cs;
*u = Bu + Bv;
*v = Bu - Bv;
}
}
}
}

template<class complex_value_type>
void complex_dft_power2(
void complex_dft_power2_dif(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the GSL documentation, the rfft can be deduced from the decimation-in-time (DIT) complex fft.
The default complex fft for powers of 2 is implemented as Decimation-in-frequency (DIF), i wrote another complex fft as DIT to test the algorithm before writing the rfft.

const complex_value_type *in_first,
const complex_value_type *in_last,
complex_value_type* out,
Expand Down
91 changes: 91 additions & 0 deletions include/boost/math/fft/bsl_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <boost/math/fft/multiprecision_complex.hpp>

#include <boost/math/fft/algorithms.hpp>
#include <boost/math/fft/real_algorithms.hpp>
#include <boost/math/fft/dft_api.hpp>

namespace boost { namespace math { namespace fft {
Expand Down Expand Up @@ -138,11 +139,101 @@
std::size_t my_size{};
};


Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the bsl rfft backend needs to be implemented

template<class T, class allocator_t = std::allocator<T> >
class bsl_rfft_backend
{
public:
using value_type = T;
using allocator_type = allocator_t;

// the provided root of unity is used instead of exp(-i 2 pi/n)
constexpr bsl_rfft_backend(std::size_t n, const allocator_type& in_alloc = allocator_type{}):
alloc{in_alloc},
my_size{n}
{
}

~bsl_rfft_backend()
{
}

void resize(std::size_t new_size)
{
my_size = new_size;
}

constexpr std::size_t size() const { return my_size; }
constexpr std::size_t unique_complex_size() const { return my_size/2 + 1;}

void real_to_halfcomplex(const value_type* in, value_type* out) const
{
const long N = static_cast<long>(size());
// select the implementation according to the DFT size
switch(N)
{
case 0:
return;
case 1:
out[0]=in[0];
return;
case 2:
detail::real_dft_2(in,out,1);
return;
}
if( detail::is_power2(N))
{
detail::real_dft_power2(in,in+N,out,1);
}else
{
detail::real_dft_composite(in,in+N,out,1,alloc);
}
//if(detail::is_prime(N))
//{
// detail::real_dft_prime_rader(in,in+N,out,sign,alloc);
//}
}
void halfcomplex_to_real(const value_type* in, value_type* out) const
{
const long N = static_cast<long>(size());
// select the implementation according to the DFT size
switch(N)
{
case 0:
return;
case 1:
out[0]=in[0];
return;
case 2:
detail::real_inverse_dft_2(in,out,1);
return;
}
if( detail::is_power2(N))
{
detail::real_inverse_dft_power2(in,in+N,out,1);
}else
{
//detail::real_inverse_dft_composite(in,in+N,out,1,alloc);
}
//if(detail::is_prime(N))
//{
// detail::real_inverse_dft_prime_rader(in,in+N,out,sign,alloc);
//}
}

private:
allocator_type alloc;
std::size_t my_size{};
};

} // namespace detail

template<class RingType = std::complex<double>, class Allocator_t = std::allocator<RingType> >
using bsl_dft = detail::complex_dft<detail::bsl_backend,RingType,Allocator_t>;

template<class T = double, class Allocator_t = std::allocator<T> >
using bsl_rfft = detail::real_dft<detail::bsl_rfft_backend,T,Allocator_t>;

template<class RingType = std::complex<double>, class Allocator_t = std::allocator<RingType> >
using bsl_algebraic_dft = detail::algebraic_dft<detail::bsl_backend,RingType,Allocator_t>;

Expand Down
20 changes: 3 additions & 17 deletions include/boost/math/fft/fftw_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,28 +574,14 @@
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm that we use for rfft for powers of 2, comes naturally in-place if we use the halfcomplex representation used by FFTW.

out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[j] = tmp[i];
if(j+1<N)
out[j+1] = tmp[N-i];
}
for(unsigned int i=1,j=N-1;i<j;++i,--j)
out[j] = -out[j];
}
void unpack_halfcomplex(real_value_type* out) const
// precondition:
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[i] = tmp[j];
if(j+1<N)
out[N-i] = tmp[j+1];
}
pack_halfcomplex(out);
}

public:
Expand Down
30 changes: 30 additions & 0 deletions include/boost/math/fft/gsl_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,34 @@ namespace fft { namespace detail {
gsl_fft_halfcomplex_wavetable *halfcomplex_wtable;
gsl_fft_real_workspace *real_wspace;

void pack_halfcomplex(real_value_type* out) const
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm that we use for rfft for powers of 2, comes naturally in-place if we use the halfcomplex representation used by FFTW. Hence we have to pack and unpack the halfcomplex returned by GSL to make it compatible with our representation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, I appreciate your comments about what's going on in the code.

It's a bit unrelated to this comment, but I noticed that we could replace 2*boost::math::constants::pi<RealType>() with boost::math::constants::two_pi<RealType>(). This could eliminate 1 ULP of error.

// precondition:
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[j] = tmp[i];
if(j+1<N)
out[j+1] = -tmp[N-i];
}
}
void unpack_halfcomplex(real_value_type* out) const
// precondition:
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[i] = tmp[j];
if(j+1<N)
out[N-i] = -tmp[j+1];
}
}
template<plan_type p>
void execute(const real_value_type* in,
real_value_type* out,
Expand All @@ -189,6 +217,7 @@ namespace fft { namespace detail {
}
gsl_fft_real_transform(
out,1, N, real_wtable, real_wspace);
unpack_halfcomplex(out);
}
template<plan_type p>
void execute(const real_value_type* in,
Expand All @@ -203,6 +232,7 @@ namespace fft { namespace detail {
// undefined-behavior
std::copy(in,in+N,out);
}
pack_halfcomplex(out);
gsl_fft_halfcomplex_transform(
out,1, N, halfcomplex_wtable, real_wspace);
}
Expand Down
Loading