Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Real to complex #21

Merged
merged 13 commits into from
Aug 11, 2021
65 changes: 64 additions & 1 deletion include/boost/math/fft/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,17 +369,28 @@

std::reverse(prime_factors.begin(), prime_factors.begin()+nfactors);

//auto show = [&] (const ComplexType* beg, const ComplexType* end)
//{
// for(;beg!=end;++beg)
// {
// std::cout << *beg << ", ";
// }
// std::cout << "\n";
//};

// butterfly pattern
long len = 1;
for (int ip=0;ip<nfactors;++ip)
{
//std::cout << "pass " << ip << "\n";
int p = prime_factors[ip];
long len_old = len;
len *= p;

std::vector<ComplexType,allocator_type> tmp(p,alloc);
for (long i = 0; i < n; i += len)
{
//std::cout << " i = " << i << "\n";
for(long k=0;k<len_old;++k)
{
for(long j=0;j<p;++j)
Expand All @@ -399,6 +410,7 @@
for(long j=0;j<p;++j)
out[i+ j*len_old + k] = tmp[j];
}
//show(out+i,out+i+len);
}
}
}
Expand Down Expand Up @@ -489,8 +501,59 @@
}
}

template <class T>
void complex_dft_power2(const T *in_first, const T *in_last, T* out, int sign)
{
/*
Cooley-Tukey mapping, in-place Decimation in Time
*/
const long ptrdiff = static_cast<long>(std::distance(in_first,in_last));
if(ptrdiff <=0 )
return;
const long n = lower_bound_power2(ptrdiff);

if(in_first!=out)
std::copy(in_first,in_last,out);

if (n == 1)
return;


// Gold-Rader bit-reversal algorithm.
for(int i=0,j=0;i<n-1;++i)
{
if(i<j)
std::swap(out[i],out[j]);
for(int k=n>>1;!( (j^=k)&k );k>>=1);
}


for (int len = 2, k = 1; len <= n; len <<= 1, ++k)
{
for (int i = 0; i < n; i += len)
{
{
int j=0;
T* u = out + i + j, *v = out + i + j + len / 2;
T Bu = *u, Bv = *v;
*u = Bu + Bv;
*v = Bu - Bv;
}
for (int j = 1; j < len / 2; ++j)
{
T cs{complex_root_of_unity<T>(1<<k,j*sign)};

T* u = out + i + j, *v = out + i + j + len / 2;
T Bu = *u, Bv = *v * cs;
*u = Bu + Bv;
*v = Bu - Bv;
}
}
}
}

template<class complex_value_type>
void complex_dft_power2(
void complex_dft_power2_dif(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the GSL documentation, the rfft can be deduced from the decimation-in-time (DIT) complex fft.
The default complex fft for powers of 2 is implemented as Decimation-in-frequency (DIF), i wrote another complex fft as DIT to test the algorithm before writing the rfft.

const complex_value_type *in_first,
const complex_value_type *in_last,
complex_value_type* out,
Expand Down
91 changes: 91 additions & 0 deletions include/boost/math/fft/bsl_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <boost/math/fft/multiprecision_complex.hpp>

#include <boost/math/fft/algorithms.hpp>
#include <boost/math/fft/real_algorithms.hpp>
#include <boost/math/fft/dft_api.hpp>

namespace boost { namespace math { namespace fft {
Expand Down Expand Up @@ -138,11 +139,101 @@
std::size_t my_size{};
};


Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the bsl rfft backend needs to be implemented

template<class T, class allocator_t = std::allocator<T> >
class bsl_rfft_backend
{
public:
using value_type = T;
using allocator_type = allocator_t;

// the provided root of unity is used instead of exp(-i 2 pi/n)
constexpr bsl_rfft_backend(std::size_t n, const allocator_type& in_alloc = allocator_type{}):
alloc{in_alloc},
my_size{n}
{
}

~bsl_rfft_backend()
{
}

void resize(std::size_t new_size)
{
my_size = new_size;
}

constexpr std::size_t size() const { return my_size; }
constexpr std::size_t unique_complex_size() const { return my_size/2 + 1;}

void real_to_halfcomplex(const value_type* in, value_type* out) const
{
const long N = static_cast<long>(size());
// select the implementation according to the DFT size
switch(N)
{
case 0:
return;
case 1:
out[0]=in[0];
return;
case 2:
detail::real_dft_2(in,out,1);
return;
}
if( detail::is_power2(N))
{
detail::real_dft_power2(in,in+N,out,1);
}else
{
detail::real_dft_composite(in,in+N,out,1,alloc);
}
//if(detail::is_prime(N))
//{
// detail::real_dft_prime_rader(in,in+N,out,sign,alloc);
//}
}
void halfcomplex_to_real(const value_type* in, value_type* out) const
{
const long N = static_cast<long>(size());
// select the implementation according to the DFT size
switch(N)
{
case 0:
return;
case 1:
out[0]=in[0];
return;
case 2:
detail::real_inverse_dft_2(in,out,1);
return;
}
if( detail::is_power2(N))
{
detail::real_inverse_dft_power2(in,in+N,out,1);
}else
{
detail::real_inverse_dft_composite(in,in+N,out,1,alloc);
}
//if(detail::is_prime(N))
//{
// detail::real_inverse_dft_prime_rader(in,in+N,out,sign,alloc);
//}
}

private:
allocator_type alloc;
std::size_t my_size{};
};

} // namespace detail

template<class RingType = std::complex<double>, class Allocator_t = std::allocator<RingType> >
using bsl_dft = detail::complex_dft<detail::bsl_backend,RingType,Allocator_t>;

template<class T = double, class Allocator_t = std::allocator<T> >
using bsl_rfft = detail::real_dft<detail::bsl_rfft_backend,T,Allocator_t>;

template<class RingType = std::complex<double>, class Allocator_t = std::allocator<RingType> >
using bsl_algebraic_dft = detail::algebraic_dft<detail::bsl_backend,RingType,Allocator_t>;

Expand Down
25 changes: 8 additions & 17 deletions include/boost/math/fft/fftw_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,28 +574,14 @@
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm that we use for rfft for powers of 2, comes naturally in-place if we use the halfcomplex representation used by FFTW.

out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[j] = tmp[i];
if(j+1<N)
out[j+1] = tmp[N-i];
}
for(unsigned int i=1,j=N-1;i<j;++i,--j)
out[j] = -out[j];
}
void unpack_halfcomplex(real_value_type* out) const
// precondition:
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[i] = tmp[j];
if(j+1<N)
out[N-i] = tmp[j+1];
}
pack_halfcomplex(out);
}

public:
Expand Down Expand Up @@ -638,6 +624,11 @@
std::copy(in,in+size(),out);
unpack_halfcomplex(out);
execute(my_hc2r_plan,my_hc2r_unaligned_plan,out,out);

const std::size_t N = size();
const real_value_type inv_N = real_value_type{1.0}/N;
for(unsigned int i=0;i<N;++i)
out[i] *= inv_N;
Copy link
Collaborator Author

@Lagrang3 Lagrang3 Aug 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when transforming halfcomplex-to-real, both GSL and FFTW, return an array which is a 1/N factor of the original real array. This could be in tune with the fact that the reverse FFT is the inverse of the forward FFT up to a 1/N factor. But the actual implementation of the halfcomplex-to-real (at least in BSL) is the reverse graph of the real-to-halfcomplex and the 1/N factor does not appear. I cannot see a good reason why the halfcomplex-to-real cannot be the inverse linear map of the real-to-halfcomplex. In the BSL this comes naturally. With the GSL and FFTW I need to multiply times 1/N.

UPD. I realized that the halfcomplex-to-real in BSL has multiplications by inverse factors of N all over. So it doesn't actually comes naturally like I previously said. @cosurgi @ckormanyos : if you agree I will change the halfcomplex-to-real to return the inverse of the real-to-halfcomplex times N, just to save some multiplications and be in tune with the FFTW and GSL conventions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Lagrang3 yes, this is a good idea. Better to make sure that all backends produce the same kind of output. To avoid any kind of frustration for the users who have a nice working code, and only decide to switch to a different backend.

Having to put an if somewhere in the user code to multiply by 1/N, depending on the type of backend, would not be good.

}
template<class Complex>
void real_to_complex(const real_value_type* in, Complex *out)const
Expand Down
34 changes: 34 additions & 0 deletions include/boost/math/fft/gsl_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,34 @@ namespace fft { namespace detail {
gsl_fft_halfcomplex_wavetable *halfcomplex_wtable;
gsl_fft_real_workspace *real_wspace;

void pack_halfcomplex(real_value_type* out) const
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm that we use for rfft for powers of 2, comes naturally in-place if we use the halfcomplex representation used by FFTW. Hence we have to pack and unpack the halfcomplex returned by GSL to make it compatible with our representation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, I appreciate your comments about what's going on in the code.

It's a bit unrelated to this comment, but I noticed that we could replace 2*boost::math::constants::pi<RealType>() with boost::math::constants::two_pi<RealType>(). This could eliminate 1 ULP of error.

// precondition:
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[j] = tmp[i];
if(j+1<N)
out[j+1] = -tmp[N-i];
}
}
void unpack_halfcomplex(real_value_type* out) const
// precondition:
// -> size(out) >= N
{
const std::size_t N = size();
vector_t<real_value_type> tmp(out,out+N);
out[0]=tmp[0];
for(unsigned int i=1,j=1;j<N;++i,j+=2)
{
out[i] = tmp[j];
if(j+1<N)
out[N-i] = -tmp[j+1];
}
}
template<plan_type p>
void execute(const real_value_type* in,
real_value_type* out,
Expand All @@ -189,6 +217,7 @@ namespace fft { namespace detail {
}
gsl_fft_real_transform(
out,1, N, real_wtable, real_wspace);
unpack_halfcomplex(out);
}
template<plan_type p>
void execute(const real_value_type* in,
Expand All @@ -203,8 +232,13 @@ namespace fft { namespace detail {
// undefined-behavior
std::copy(in,in+N,out);
}
pack_halfcomplex(out);
gsl_fft_halfcomplex_transform(
out,1, N, halfcomplex_wtable, real_wspace);

const real_value_type inv_N = real_value_type{1.0}/N;
for(unsigned int i=0;i<N;++i)
out[i] *= inv_N;
}

void free()
Expand Down
Loading