Mercurial > octave-libtiff
view liboctave/numeric/oct-fftw.cc @ 30564:796f54d4ddbf stable
update Octave Project Developers copyright for the new year
In files that have the "Octave Project Developers" copyright notice,
update for 2021.
In all .txi and .texi files except gpl.txi and gpl.texi in the
doc/liboctave and doc/interpreter directories, change the copyright
to "Octave Project Developers", the same as used for other source
files. Update copyright notices for 2022 (not done since 2019). For
gpl.txi and gpl.texi, change the copyright notice to be "Free Software
Foundation, Inc." and leave the date at 2007 only because this file
only contains the text of the GPL, not anything created by the Octave
Project Developers.
Add Paul Thomas to contributors.in.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Tue, 28 Dec 2021 18:22:40 -0500 |
parents | eb10871a5882 |
children |
line wrap: on
line source
//////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001-2022 The Octave Project Developers // // See the file COPYRIGHT.md in the top-level directory of this // distribution or <https://octave.org/copyright/>. // // This file is part of Octave. // // Octave is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Octave is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Octave; see the file COPYING. If not, see // <https://www.gnu.org/licenses/>. // //////////////////////////////////////////////////////////////////////// #if defined (HAVE_CONFIG_H) # include "config.h" #endif #if defined (HAVE_FFTW3_H) # include <fftw3.h> #endif #include "lo-error.h" #include "oct-fftw.h" #include "oct-locbuf.h" #include "quit.h" #include "singleton-cleanup.h" #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS) # include "nproc-wrapper.h" #endif namespace octave { #if defined (HAVE_FFTW) fftw_planner *fftw_planner::s_instance = nullptr; // Helper class to create and cache FFTW plans for both 1D and // 2D. This implementation defaults to using FFTW_ESTIMATE to create // the plans, which in theory is suboptimal, but provides quite // reasonable performance in practice. // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3 // will destroy the input and output arrays. We must, therefore, create a // temporary input array with the same size and 16-byte alignment as // the original array when using a different planner strategy. // Note that we also use any wisdom that is available, either in a // FFTW3 system wide file or as supplied by the user. // FIXME: if we can ensure 16 byte alignment in Array<T> // (<T> *data) the FFTW3 can use SIMD instructions for further // acceleration. // Note that it is profitable to store the FFTW3 plans, for small FFTs. fftw_planner::fftw_planner (void) : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0), m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1) { m_plan[0] = m_plan[1] = nullptr; m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0; m_simd_align[0] = m_simd_align[1] = false; m_inplace[0] = m_inplace[1] = false; m_n[0] = m_n[1] = dim_vector (); #if defined (HAVE_FFTW3_THREADS) int init_ret = fftw_init_threads (); if (! init_ret) (*current_liboctave_error_handler) ("Error initializing FFTW threads"); // Use number of processors available to the current process // This can be later changed with fftw ("threads", nthreads). m_nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); fftw_plan_with_nthreads (m_nthreads); #endif // If we have a system wide wisdom file, import it. fftw_import_system_wisdom (); } fftw_planner::~fftw_planner (void) { fftw_plan *plan_p; plan_p = reinterpret_cast<fftw_plan *> (&m_rplan); if (*plan_p) fftw_destroy_plan (*plan_p); plan_p = reinterpret_cast<fftw_plan *> (&m_plan[0]); if (*plan_p) fftw_destroy_plan (*plan_p); plan_p = reinterpret_cast<fftw_plan *> (&m_plan[1]); if (*plan_p) fftw_destroy_plan (*plan_p); } bool fftw_planner::instance_ok (void) { bool retval = true; if (! s_instance) { s_instance = new fftw_planner (); singleton_cleanup_list::add (cleanup_instance); } return retval; } void fftw_planner::threads (int nt) { #if defined (HAVE_FFTW3_THREADS) if (instance_ok () && nt != threads ()) { s_instance->m_nthreads = nt; fftw_plan_with_nthreads (nt); // Clear the current plans. s_instance->m_rplan = nullptr; s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr; } #else octave_unused_parameter (nt); (*current_liboctave_warning_handler) ("unable to change number of threads without FFTW thread support"); #endif } #define CHECK_SIMD_ALIGNMENT(x) \ (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0) void * fftw_planner::do_create_plan (int dir, const int rank, const dim_vector& dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out) { int which = (dir == FFTW_FORWARD) ? 0 : 1; fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_plan[which]); bool create_new_plan = false; bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); bool ioinplace = (in == out); // Don't create a new plan if we have a non SIMD plan already but // can do SIMD. This prevents endlessly recreating plans if we // change the alignment. if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride || m_r[which] != rank || m_h[which] != howmany || ioinplace != m_inplace[which] || ((ioalign != m_simd_align[which]) ? ! ioalign : false)) create_new_plan = true; else { // We still might not have the same shape of array. for (int i = 0; i < rank; i++) if (dims(i) != m_n[which](i)) { create_new_plan = true; break; } } if (create_new_plan) { m_d[which] = dist; m_s[which] = stride; m_r[which] = rank; m_h[which] = howmany; m_simd_align[which] = ioalign; m_inplace[which] = ioinplace; m_n[which] = dims; // Note reversal of dimensions for column major storage in FFTW. octave_idx_type nn = 1; OCTAVE_LOCAL_BUFFER (int, tmp, rank); for (int i = 0, j = rank-1; i < rank; i++, j--) { tmp[i] = dims(j); nn *= dims(j); } int plan_flags = 0; bool plan_destroys_in = true; switch (m_meth) { case UNKNOWN: case ESTIMATE: plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; break; case MEASURE: plan_flags |= FFTW_MEASURE; break; case PATIENT: plan_flags |= FFTW_PATIENT; break; case EXHAUSTIVE: plan_flags |= FFTW_EXHAUSTIVE; break; case HYBRID: if (nn < 8193) plan_flags |= FFTW_MEASURE; else { plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; } break; } if (ioalign) plan_flags &= ~FFTW_UNALIGNED; else plan_flags |= FFTW_UNALIGNED; if (*cur_plan_p) fftw_destroy_plan (*cur_plan_p); if (plan_destroys_in) { // Create matrix with the same size and 16-byte alignment as input OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); itmp = reinterpret_cast<Complex *> (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); *cur_plan_p = fftw_plan_many_dft (rank, tmp, howmany, reinterpret_cast<fftw_complex *> (itmp), nullptr, stride, dist, reinterpret_cast<fftw_complex *> (out), nullptr, stride, dist, dir, plan_flags); } else { *cur_plan_p = fftw_plan_many_dft (rank, tmp, howmany, reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), nullptr, stride, dist, reinterpret_cast<fftw_complex *> (out), nullptr, stride, dist, dir, plan_flags); } if (*cur_plan_p == nullptr) (*current_liboctave_error_handler) ("Error creating FFTW plan"); } return *cur_plan_p; } void * fftw_planner::do_create_plan (const int rank, const dim_vector& dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const double *in, Complex *out) { fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&m_rplan); bool create_new_plan = false; bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); // Don't create a new plan if we have a non SIMD plan already but // can do SIMD. This prevents endlessly recreating plans if we // change the alignment. if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false)) create_new_plan = true; else { // We still might not have the same shape of array. for (int i = 0; i < rank; i++) if (dims(i) != m_rn(i)) { create_new_plan = true; break; } } if (create_new_plan) { m_rd = dist; m_rs = stride; m_rr = rank; m_rh = howmany; m_rsimd_align = ioalign; m_rn = dims; // Note reversal of dimensions for column major storage in FFTW. octave_idx_type nn = 1; OCTAVE_LOCAL_BUFFER (int, tmp, rank); for (int i = 0, j = rank-1; i < rank; i++, j--) { tmp[i] = dims(j); nn *= dims(j); } int plan_flags = 0; bool plan_destroys_in = true; switch (m_meth) { case UNKNOWN: case ESTIMATE: plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; break; case MEASURE: plan_flags |= FFTW_MEASURE; break; case PATIENT: plan_flags |= FFTW_PATIENT; break; case EXHAUSTIVE: plan_flags |= FFTW_EXHAUSTIVE; break; case HYBRID: if (nn < 8193) plan_flags |= FFTW_MEASURE; else { plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; } break; } if (ioalign) plan_flags &= ~FFTW_UNALIGNED; else plan_flags |= FFTW_UNALIGNED; if (*cur_plan_p) fftw_destroy_plan (*cur_plan_p); if (plan_destroys_in) { // Create matrix with the same size and 16-byte alignment as input OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); itmp = reinterpret_cast<double *> (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); *cur_plan_p = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, nullptr, stride, dist, reinterpret_cast<fftw_complex *> (out), nullptr, stride, dist, plan_flags); } else { *cur_plan_p = fftw_plan_many_dft_r2c (rank, tmp, howmany, (const_cast<double *> (in)), nullptr, stride, dist, reinterpret_cast<fftw_complex *> (out), nullptr, stride, dist, plan_flags); } if (*cur_plan_p == nullptr) (*current_liboctave_error_handler) ("Error creating FFTW plan"); } return *cur_plan_p; } fftw_planner::FftwMethod fftw_planner::do_method (void) { return m_meth; } fftw_planner::FftwMethod fftw_planner::do_method (FftwMethod _meth) { FftwMethod ret = m_meth; if (_meth == ESTIMATE || _meth == MEASURE || _meth == PATIENT || _meth == EXHAUSTIVE || _meth == HYBRID) { if (m_meth != _meth) { m_meth = _meth; if (m_rplan) fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_rplan)); if (m_plan[0]) fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[0])); if (m_plan[1]) fftw_destroy_plan (reinterpret_cast<fftw_plan> (m_plan[1])); m_rplan = m_plan[0] = m_plan[1] = nullptr; } } else ret = UNKNOWN; return ret; } float_fftw_planner *float_fftw_planner::s_instance = nullptr; float_fftw_planner::float_fftw_planner (void) : m_meth (ESTIMATE), m_rplan (nullptr), m_rd (0), m_rs (0), m_rr (0), m_rh (0), m_rn (), m_rsimd_align (false), m_nthreads (1) { m_plan[0] = m_plan[1] = nullptr; m_d[0] = m_d[1] = m_s[0] = m_s[1] = m_r[0] = m_r[1] = m_h[0] = m_h[1] = 0; m_simd_align[0] = m_simd_align[1] = false; m_inplace[0] = m_inplace[1] = false; m_n[0] = m_n[1] = dim_vector (); #if defined (HAVE_FFTW3F_THREADS) int init_ret = fftwf_init_threads (); if (! init_ret) (*current_liboctave_error_handler) ("Error initializing FFTW3F threads"); // Use number of processors available to the current process // This can be later changed with fftw ("threads", nthreads). m_nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); fftwf_plan_with_nthreads (m_nthreads); #endif // If we have a system wide wisdom file, import it. fftwf_import_system_wisdom (); } float_fftw_planner::~float_fftw_planner (void) { fftwf_plan *plan_p; plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan); if (*plan_p) fftwf_destroy_plan (*plan_p); plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[0]); if (*plan_p) fftwf_destroy_plan (*plan_p); plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[1]); if (*plan_p) fftwf_destroy_plan (*plan_p); } bool float_fftw_planner::instance_ok (void) { bool retval = true; if (! s_instance) { s_instance = new float_fftw_planner (); singleton_cleanup_list::add (cleanup_instance); } return retval; } void float_fftw_planner::threads (int nt) { #if defined (HAVE_FFTW3F_THREADS) if (instance_ok () && nt != threads ()) { s_instance->m_nthreads = nt; fftwf_plan_with_nthreads (nt); // Clear the current plans. s_instance->m_rplan = nullptr; s_instance->m_plan[0] = s_instance->m_plan[1] = nullptr; } #else octave_unused_parameter (nt); (*current_liboctave_warning_handler) ("unable to change number of threads without FFTW thread support"); #endif } void * float_fftw_planner::do_create_plan (int dir, const int rank, const dim_vector& dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out) { int which = (dir == FFTW_FORWARD) ? 0 : 1; fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_plan[which]); bool create_new_plan = false; bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); bool ioinplace = (in == out); // Don't create a new plan if we have a non SIMD plan already but // can do SIMD. This prevents endlessly recreating plans if we // change the alignment. if (m_plan[which] == nullptr || m_d[which] != dist || m_s[which] != stride || m_r[which] != rank || m_h[which] != howmany || ioinplace != m_inplace[which] || ((ioalign != m_simd_align[which]) ? ! ioalign : false)) create_new_plan = true; else { // We still might not have the same shape of array. for (int i = 0; i < rank; i++) if (dims(i) != m_n[which](i)) { create_new_plan = true; break; } } if (create_new_plan) { m_d[which] = dist; m_s[which] = stride; m_r[which] = rank; m_h[which] = howmany; m_simd_align[which] = ioalign; m_inplace[which] = ioinplace; m_n[which] = dims; // Note reversal of dimensions for column major storage in FFTW. octave_idx_type nn = 1; OCTAVE_LOCAL_BUFFER (int, tmp, rank); for (int i = 0, j = rank-1; i < rank; i++, j--) { tmp[i] = dims(j); nn *= dims(j); } int plan_flags = 0; bool plan_destroys_in = true; switch (m_meth) { case UNKNOWN: case ESTIMATE: plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; break; case MEASURE: plan_flags |= FFTW_MEASURE; break; case PATIENT: plan_flags |= FFTW_PATIENT; break; case EXHAUSTIVE: plan_flags |= FFTW_EXHAUSTIVE; break; case HYBRID: if (nn < 8193) plan_flags |= FFTW_MEASURE; else { plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; } break; } if (ioalign) plan_flags &= ~FFTW_UNALIGNED; else plan_flags |= FFTW_UNALIGNED; if (*cur_plan_p) fftwf_destroy_plan (*cur_plan_p); if (plan_destroys_in) { // Create matrix with the same size and 16-byte alignment as input OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); itmp = reinterpret_cast<FloatComplex *> (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); *cur_plan_p = fftwf_plan_many_dft (rank, tmp, howmany, reinterpret_cast<fftwf_complex *> (itmp), nullptr, stride, dist, reinterpret_cast<fftwf_complex *> (out), nullptr, stride, dist, dir, plan_flags); } else { *cur_plan_p = fftwf_plan_many_dft (rank, tmp, howmany, reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)), nullptr, stride, dist, reinterpret_cast<fftwf_complex *> (out), nullptr, stride, dist, dir, plan_flags); } if (*cur_plan_p == nullptr) (*current_liboctave_error_handler) ("Error creating FFTW plan"); } return *cur_plan_p; } void * float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const float *in, FloatComplex *out) { fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&m_rplan); bool create_new_plan = false; bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); // Don't create a new plan if we have a non SIMD plan already but // can do SIMD. This prevents endlessly recreating plans if we // change the alignment. if (m_rplan == nullptr || m_rd != dist || m_rs != stride || m_rr != rank || m_rh != howmany || ((ioalign != m_rsimd_align) ? ! ioalign : false)) create_new_plan = true; else { // We still might not have the same shape of array. for (int i = 0; i < rank; i++) if (dims(i) != m_rn(i)) { create_new_plan = true; break; } } if (create_new_plan) { m_rd = dist; m_rs = stride; m_rr = rank; m_rh = howmany; m_rsimd_align = ioalign; m_rn = dims; // Note reversal of dimensions for column major storage in FFTW. octave_idx_type nn = 1; OCTAVE_LOCAL_BUFFER (int, tmp, rank); for (int i = 0, j = rank-1; i < rank; i++, j--) { tmp[i] = dims(j); nn *= dims(j); } int plan_flags = 0; bool plan_destroys_in = true; switch (m_meth) { case UNKNOWN: case ESTIMATE: plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; break; case MEASURE: plan_flags |= FFTW_MEASURE; break; case PATIENT: plan_flags |= FFTW_PATIENT; break; case EXHAUSTIVE: plan_flags |= FFTW_EXHAUSTIVE; break; case HYBRID: if (nn < 8193) plan_flags |= FFTW_MEASURE; else { plan_flags |= FFTW_ESTIMATE; plan_destroys_in = false; } break; } if (ioalign) plan_flags &= ~FFTW_UNALIGNED; else plan_flags |= FFTW_UNALIGNED; if (*cur_plan_p) fftwf_destroy_plan (*cur_plan_p); if (plan_destroys_in) { // Create matrix with the same size and 16-byte alignment as input OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); itmp = reinterpret_cast<float *> (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); *cur_plan_p = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, nullptr, stride, dist, reinterpret_cast<fftwf_complex *> (out), nullptr, stride, dist, plan_flags); } else { *cur_plan_p = fftwf_plan_many_dft_r2c (rank, tmp, howmany, (const_cast<float *> (in)), nullptr, stride, dist, reinterpret_cast<fftwf_complex *> (out), nullptr, stride, dist, plan_flags); } if (*cur_plan_p == nullptr) (*current_liboctave_error_handler) ("Error creating FFTW plan"); } return *cur_plan_p; } float_fftw_planner::FftwMethod float_fftw_planner::do_method (void) { return m_meth; } float_fftw_planner::FftwMethod float_fftw_planner::do_method (FftwMethod _meth) { FftwMethod ret = m_meth; if (_meth == ESTIMATE || _meth == MEASURE || _meth == PATIENT || _meth == EXHAUSTIVE || _meth == HYBRID) { if (m_meth != _meth) { m_meth = _meth; if (m_rplan) fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_rplan)); if (m_plan[0]) fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[0])); if (m_plan[1]) fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (m_plan[1])); m_rplan = m_plan[0] = m_plan[1] = nullptr; } } else ret = UNKNOWN; return ret; } template <typename T> static inline void convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc, octave_idx_type stride, octave_idx_type dist) { octave_quit (); // Fill in the missing data. for (std::size_t i = 0; i < nr; i++) for (std::size_t j = nc/2+1; j < nc; j++) out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]); octave_quit (); } template <typename T> static inline void convert_packcomplex_Nd (T *out, const dim_vector& dv) { std::size_t nc = dv(0); std::size_t nr = dv(1); std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1); std::size_t nrp = nr * np; T *ptr1, *ptr2; octave_quit (); // Create space for the missing elements. for (std::size_t i = 0; i < nrp; i++) { ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); ptr2 = out + i * nc; for (std::size_t j = 0; j < nc/2+1; j++) *ptr2++ = *ptr1++; } octave_quit (); // Fill in the missing data for the rank = 2 case directly for speed. for (std::size_t i = 0; i < np; i++) { for (std::size_t j = 1; j < nr; j++) for (std::size_t k = nc/2+1; k < nc; k++) out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]); for (std::size_t j = nc/2+1; j < nc; j++) out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]); } octave_quit (); // Now do the permutations needed for rank > 2 cases. std::size_t jstart = dv(0) * dv(1); std::size_t kstep = dv(0); std::size_t nel = dv.numel (); for (int inner = 2; inner < dv.ndims (); inner++) { std::size_t jmax = jstart * dv(inner); for (std::size_t i = 0; i < nel; i+=jmax) for (std::size_t j = jstart, jj = jmax-jstart; j < jj; j+=jstart, jj-=jstart) for (std::size_t k = 0; k < jstart; k+= kstep) for (std::size_t l = nc/2+1; l < nc; l++) { T tmp = out[i+ j + k + l]; out[i + j + k + l] = out[i + jj + k + l]; out[i + jj + k + l] = tmp; } jstart = jmax; } octave_quit (); } int fftw::fft (const double *in, Complex *out, std::size_t npts, std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) { dist = (dist < 0 ? npts : dist); dim_vector dv (npts, 1); void *vplan = fftw_planner::create_plan (1, dv, nsamples, stride, dist, in, out); fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)), reinterpret_cast<fftw_complex *> (out)); // Need to create other half of the transform. convert_packcomplex_1d (out, nsamples, npts, stride, dist); return 0; } int fftw::fft (const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) { dist = (dist < 0 ? npts : dist); dim_vector dv (npts, 1); void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv, nsamples, stride, dist, in, out); fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); fftw_execute_dft (m_plan, reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), reinterpret_cast<fftw_complex *> (out)); return 0; } int fftw::ifft (const Complex *in, Complex *out, std::size_t npts, std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) { dist = (dist < 0 ? npts : dist); dim_vector dv (npts, 1); void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples, stride, dist, in, out); fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); fftw_execute_dft (m_plan, reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), reinterpret_cast<fftw_complex *> (out)); const Complex scale = npts; for (std::size_t j = 0; j < nsamples; j++) for (std::size_t i = 0; i < npts; i++) out[i*stride + j*dist] /= scale; return 0; } int fftw::fftNd (const double *in, Complex *out, const int rank, const dim_vector& dv) { octave_idx_type dist = 1; for (int i = 0; i < rank; i++) dist *= dv(i); // Fool with the position of the start of the output matrix, so that // creating other half of the matrix won't cause cache problems. octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist, in, out + offset); fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); fftw_execute_dft_r2c (m_plan, (const_cast<double *>(in)), reinterpret_cast<fftw_complex *> (out+ offset)); // Need to create other half of the transform. convert_packcomplex_Nd (out, dv); return 0; } int fftw::fftNd (const Complex *in, Complex *out, const int rank, const dim_vector& dv) { octave_idx_type dist = 1; for (int i = 0; i < rank; i++) dist *= dv(i); void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv, 1, 1, dist, in, out); fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); fftw_execute_dft (m_plan, reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), reinterpret_cast<fftw_complex *> (out)); return 0; } int fftw::ifftNd (const Complex *in, Complex *out, const int rank, const dim_vector& dv) { octave_idx_type dist = 1; for (int i = 0; i < rank; i++) dist *= dv(i); void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, 1, 1, dist, in, out); fftw_plan m_plan = reinterpret_cast<fftw_plan> (vplan); fftw_execute_dft (m_plan, reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), reinterpret_cast<fftw_complex *> (out)); const std::size_t npts = dv.numel (); const Complex scale = npts; for (std::size_t i = 0; i < npts; i++) out[i] /= scale; return 0; } int fftw::fft (const float *in, FloatComplex *out, std::size_t npts, std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) { dist = (dist < 0 ? npts : dist); dim_vector dv (npts, 1); void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride, dist, in, out); fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)), reinterpret_cast<fftwf_complex *> (out)); // Need to create other half of the transform. convert_packcomplex_1d (out, nsamples, npts, stride, dist); return 0; } int fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts, std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) { dist = (dist < 0 ? npts : dist); dim_vector dv (npts, 1); void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv, nsamples, stride, dist, in, out); fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); fftwf_execute_dft (m_plan, reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), reinterpret_cast<fftwf_complex *> (out)); return 0; } int fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts, std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) { dist = (dist < 0 ? npts : dist); dim_vector dv (npts, 1); void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples, stride, dist, in, out); fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); fftwf_execute_dft (m_plan, reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), reinterpret_cast<fftwf_complex *> (out)); const FloatComplex scale = npts; for (std::size_t j = 0; j < nsamples; j++) for (std::size_t i = 0; i < npts; i++) out[i*stride + j*dist] /= scale; return 0; } int fftw::fftNd (const float *in, FloatComplex *out, const int rank, const dim_vector& dv) { octave_idx_type dist = 1; for (int i = 0; i < rank; i++) dist *= dv(i); // Fool with the position of the start of the output matrix, so that // creating other half of the matrix won't cause cache problems. octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist, in, out + offset); fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); fftwf_execute_dft_r2c (m_plan, (const_cast<float *>(in)), reinterpret_cast<fftwf_complex *> (out+ offset)); // Need to create other half of the transform. convert_packcomplex_Nd (out, dv); return 0; } int fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, const dim_vector& dv) { octave_idx_type dist = 1; for (int i = 0; i < rank; i++) dist *= dv(i); void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv, 1, 1, dist, in, out); fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); fftwf_execute_dft (m_plan, reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), reinterpret_cast<fftwf_complex *> (out)); return 0; } int fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, const dim_vector& dv) { octave_idx_type dist = 1; for (int i = 0; i < rank; i++) dist *= dv(i); void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, 1, 1, dist, in, out); fftwf_plan m_plan = reinterpret_cast<fftwf_plan> (vplan); fftwf_execute_dft (m_plan, reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), reinterpret_cast<fftwf_complex *> (out)); const std::size_t npts = dv.numel (); const FloatComplex scale = npts; for (std::size_t i = 0; i < npts; i++) out[i] /= scale; return 0; } #endif std::string fftw_version (void) { #if defined (HAVE_FFTW) return ::fftw_version; #else return "none"; #endif } std::string fftwf_version (void) { #if defined (HAVE_FFTW) return ::fftwf_version; #else return "none"; #endif } }